xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e2fbb1ba6e3eace0c4c0036fdbc8a86f123a4fe5)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
170 {
171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
172 
173   PetscFunctionBegin;
174   if (mat->A) {
175     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
176     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
182 {
183   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
184   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
185   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
186   const PetscInt  *ia, *ib;
187   const MatScalar *aa, *bb, *aav, *bav;
188   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
189   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
190 
191   PetscFunctionBegin;
192   *keptrows = NULL;
193 
194   ia = a->i;
195   ib = b->i;
196   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
197   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
198   for (i = 0; i < m; i++) {
199     na = ia[i + 1] - ia[i];
200     nb = ib[i + 1] - ib[i];
201     if (!na && !nb) {
202       cnt++;
203       goto ok1;
204     }
205     aa = aav + ia[i];
206     for (j = 0; j < na; j++) {
207       if (aa[j] != 0.0) goto ok1;
208     }
209     bb = PetscSafePointerPlusOffset(bav, ib[i]);
210     for (j = 0; j < nb; j++) {
211       if (bb[j] != 0.0) goto ok1;
212     }
213     cnt++;
214   ok1:;
215   }
216   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
217   if (!n0rows) {
218     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
220     PetscFunctionReturn(PETSC_SUCCESS);
221   }
222   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
223   cnt = 0;
224   for (i = 0; i < m; i++) {
225     na = ia[i + 1] - ia[i];
226     nb = ib[i + 1] - ib[i];
227     if (!na && !nb) continue;
228     aa = aav + ia[i];
229     for (j = 0; j < na; j++) {
230       if (aa[j] != 0.0) {
231         rows[cnt++] = rstart + i;
232         goto ok2;
233       }
234     }
235     bb = PetscSafePointerPlusOffset(bav, ib[i]);
236     for (j = 0; j < nb; j++) {
237       if (bb[j] != 0.0) {
238         rows[cnt++] = rstart + i;
239         goto ok2;
240       }
241     }
242   ok2:;
243   }
244   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
245   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
251 {
252   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
253   PetscBool   cong;
254 
255   PetscFunctionBegin;
256   PetscCall(MatHasCongruentLayouts(Y, &cong));
257   if (Y->assembled && cong) {
258     PetscCall(MatDiagonalSet(aij->A, D, is));
259   } else {
260     PetscCall(MatDiagonalSet_Default(Y, D, is));
261   }
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
266 {
267   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
268   PetscInt    i, rstart, nrows, *rows;
269 
270   PetscFunctionBegin;
271   *zrows = NULL;
272   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
273   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
274   for (i = 0; i < nrows; i++) rows[i] += rstart;
275   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
280 {
281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
282   PetscInt           i, m, n, *garray = aij->garray;
283   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
284   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
285   PetscReal         *work;
286   const PetscScalar *dummy;
287 
288   PetscFunctionBegin;
289   PetscCall(MatGetSize(A, &m, &n));
290   PetscCall(PetscCalloc1(n, &work));
291   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
292   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
294   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
295   if (type == NORM_2) {
296     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
297     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
298   } else if (type == NORM_1) {
299     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
300     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
301   } else if (type == NORM_INFINITY) {
302     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
303     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
304   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
305     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
306     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
307   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
308     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
309     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
310   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
311   if (type == NORM_INFINITY) {
312     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
313   } else {
314     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
315   }
316   PetscCall(PetscFree(work));
317   if (type == NORM_2) {
318     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
319   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
320     for (i = 0; i < n; i++) reductions[i] /= m;
321   }
322   PetscFunctionReturn(PETSC_SUCCESS);
323 }
324 
325 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
326 {
327   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
328   IS              sis, gis;
329   const PetscInt *isis, *igis;
330   PetscInt        n, *iis, nsis, ngis, rstart, i;
331 
332   PetscFunctionBegin;
333   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
334   PetscCall(MatFindNonzeroRows(a->B, &gis));
335   PetscCall(ISGetSize(gis, &ngis));
336   PetscCall(ISGetSize(sis, &nsis));
337   PetscCall(ISGetIndices(sis, &isis));
338   PetscCall(ISGetIndices(gis, &igis));
339 
340   PetscCall(PetscMalloc1(ngis + nsis, &iis));
341   PetscCall(PetscArraycpy(iis, igis, ngis));
342   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
343   n = ngis + nsis;
344   PetscCall(PetscSortRemoveDupsInt(&n, iis));
345   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
346   for (i = 0; i < n; i++) iis[i] += rstart;
347   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
348 
349   PetscCall(ISRestoreIndices(sis, &isis));
350   PetscCall(ISRestoreIndices(gis, &igis));
351   PetscCall(ISDestroy(&sis));
352   PetscCall(ISDestroy(&gis));
353   PetscFunctionReturn(PETSC_SUCCESS);
354 }
355 
356 /*
357   Local utility routine that creates a mapping from the global column
358 number to the local number in the off-diagonal part of the local
359 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
360 a slightly higher hash table cost; without it it is not scalable (each processor
361 has an order N integer array but is fast to access.
362 */
363 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
364 {
365   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
366   PetscInt    n   = aij->B->cmap->n, i;
367 
368   PetscFunctionBegin;
369   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
370 #if defined(PETSC_USE_CTABLE)
371   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
372   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
373 #else
374   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
375   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
376 #endif
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
381   do { \
382     if (col <= lastcol1) low1 = 0; \
383     else high1 = nrow1; \
384     lastcol1 = col; \
385     while (high1 - low1 > 5) { \
386       t = (low1 + high1) / 2; \
387       if (rp1[t] > col) high1 = t; \
388       else low1 = t; \
389     } \
390     for (_i = low1; _i < high1; _i++) { \
391       if (rp1[_i] > col) break; \
392       if (rp1[_i] == col) { \
393         if (addv == ADD_VALUES) { \
394           ap1[_i] += value; \
395           /* Not sure LogFlops will slow dow the code or not */ \
396           (void)PetscLogFlops(1.0); \
397         } else ap1[_i] = value; \
398         goto a_noinsert; \
399       } \
400     } \
401     if (value == 0.0 && ignorezeroentries && row != col) { \
402       low1  = 0; \
403       high1 = nrow1; \
404       goto a_noinsert; \
405     } \
406     if (nonew == 1) { \
407       low1  = 0; \
408       high1 = nrow1; \
409       goto a_noinsert; \
410     } \
411     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
412     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
413     N = nrow1++ - 1; \
414     a->nz++; \
415     high1++; \
416     /* shift up all the later entries in this row */ \
417     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
418     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
419     rp1[_i] = col; \
420     ap1[_i] = value; \
421     A->nonzerostate++; \
422   a_noinsert:; \
423     ailen[row] = nrow1; \
424   } while (0)
425 
426 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
427   do { \
428     if (col <= lastcol2) low2 = 0; \
429     else high2 = nrow2; \
430     lastcol2 = col; \
431     while (high2 - low2 > 5) { \
432       t = (low2 + high2) / 2; \
433       if (rp2[t] > col) high2 = t; \
434       else low2 = t; \
435     } \
436     for (_i = low2; _i < high2; _i++) { \
437       if (rp2[_i] > col) break; \
438       if (rp2[_i] == col) { \
439         if (addv == ADD_VALUES) { \
440           ap2[_i] += value; \
441           (void)PetscLogFlops(1.0); \
442         } else ap2[_i] = value; \
443         goto b_noinsert; \
444       } \
445     } \
446     if (value == 0.0 && ignorezeroentries) { \
447       low2  = 0; \
448       high2 = nrow2; \
449       goto b_noinsert; \
450     } \
451     if (nonew == 1) { \
452       low2  = 0; \
453       high2 = nrow2; \
454       goto b_noinsert; \
455     } \
456     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
457     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
458     N = nrow2++ - 1; \
459     b->nz++; \
460     high2++; \
461     /* shift up all the later entries in this row */ \
462     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
463     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
464     rp2[_i] = col; \
465     ap2[_i] = value; \
466     B->nonzerostate++; \
467   b_noinsert:; \
468     bilen[row] = nrow2; \
469   } while (0)
470 
471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
472 {
473   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
474   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
475   PetscInt     l, *garray                         = mat->garray, diag;
476   PetscScalar *aa, *ba;
477 
478   PetscFunctionBegin;
479   /* code only works for square matrices A */
480 
481   /* find size of row to the left of the diagonal part */
482   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
483   row = row - diag;
484   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
485     if (garray[b->j[b->i[row] + l]] > diag) break;
486   }
487   if (l) {
488     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
489     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
490     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
491   }
492 
493   /* diagonal part */
494   if (a->i[row + 1] - a->i[row]) {
495     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
496     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
497     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
498   }
499 
500   /* right of diagonal part */
501   if (b->i[row + 1] - b->i[row] - l) {
502     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
503     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
504     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
505   }
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
510 {
511   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
512   PetscScalar value = 0.0;
513   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
514   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
515   PetscBool   roworiented = aij->roworiented;
516 
517   /* Some Variables required in the macro */
518   Mat         A     = aij->A;
519   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
520   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
521   PetscBool   ignorezeroentries = a->ignorezeroentries;
522   Mat         B                 = aij->B;
523   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
524   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
525   MatScalar  *aa, *ba;
526   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
527   PetscInt    nonew;
528   MatScalar  *ap1, *ap2;
529 
530   PetscFunctionBegin;
531   PetscCall(MatSeqAIJGetArray(A, &aa));
532   PetscCall(MatSeqAIJGetArray(B, &ba));
533   for (i = 0; i < m; i++) {
534     if (im[i] < 0) continue;
535     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
536     if (im[i] >= rstart && im[i] < rend) {
537       row      = im[i] - rstart;
538       lastcol1 = -1;
539       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
540       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
541       rmax1    = aimax[row];
542       nrow1    = ailen[row];
543       low1     = 0;
544       high1    = nrow1;
545       lastcol2 = -1;
546       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
547       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
548       rmax2    = bimax[row];
549       nrow2    = bilen[row];
550       low2     = 0;
551       high2    = nrow2;
552 
553       for (j = 0; j < n; j++) {
554         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
555         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
556         if (in[j] >= cstart && in[j] < cend) {
557           col   = in[j] - cstart;
558           nonew = a->nonew;
559           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
560         } else if (in[j] < 0) {
561           continue;
562         } else {
563           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
564           if (mat->was_assembled) {
565             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
566 #if defined(PETSC_USE_CTABLE)
567             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
568             col--;
569 #else
570             col = aij->colmap[in[j]] - 1;
571 #endif
572             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
573               PetscCall(MatDisAssemble_MPIAIJ(mat));               /* Change aij->B from reduced/local format to expanded/global format */
574               col = in[j];
575               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
576               B     = aij->B;
577               b     = (Mat_SeqAIJ *)B->data;
578               bimax = b->imax;
579               bi    = b->i;
580               bilen = b->ilen;
581               bj    = b->j;
582               ba    = b->a;
583               rp2   = bj + bi[row];
584               ap2   = ba + bi[row];
585               rmax2 = bimax[row];
586               nrow2 = bilen[row];
587               low2  = 0;
588               high2 = nrow2;
589               bm    = aij->B->rmap->n;
590               ba    = b->a;
591             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
592               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
593                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
594               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
595             }
596           } else col = in[j];
597           nonew = b->nonew;
598           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
599         }
600       }
601     } else {
602       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
603       if (!aij->donotstash) {
604         mat->assembled = PETSC_FALSE;
605         if (roworiented) {
606           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
607         } else {
608           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
609         }
610       }
611     }
612   }
613   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
614   PetscCall(MatSeqAIJRestoreArray(B, &ba));
615   PetscFunctionReturn(PETSC_SUCCESS);
616 }
617 
618 /*
619     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
620     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
621     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
622 */
623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
624 {
625   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
626   Mat         A      = aij->A; /* diagonal part of the matrix */
627   Mat         B      = aij->B; /* off-diagonal part of the matrix */
628   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
629   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
630   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
631   PetscInt   *ailen = a->ilen, *aj = a->j;
632   PetscInt   *bilen = b->ilen, *bj = b->j;
633   PetscInt    am          = aij->A->rmap->n, j;
634   PetscInt    diag_so_far = 0, dnz;
635   PetscInt    offd_so_far = 0, onz;
636 
637   PetscFunctionBegin;
638   /* Iterate over all rows of the matrix */
639   for (j = 0; j < am; j++) {
640     dnz = onz = 0;
641     /*  Iterate over all non-zero columns of the current row */
642     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
643       /* If column is in the diagonal */
644       if (mat_j[col] >= cstart && mat_j[col] < cend) {
645         aj[diag_so_far++] = mat_j[col] - cstart;
646         dnz++;
647       } else { /* off-diagonal entries */
648         bj[offd_so_far++] = mat_j[col];
649         onz++;
650       }
651     }
652     ailen[j] = dnz;
653     bilen[j] = onz;
654   }
655   PetscFunctionReturn(PETSC_SUCCESS);
656 }
657 
658 /*
659     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
660     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
661     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
662     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
663     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
664 */
665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
666 {
667   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
668   Mat          A    = aij->A; /* diagonal part of the matrix */
669   Mat          B    = aij->B; /* off-diagonal part of the matrix */
670   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
671   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
672   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
673   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
674   PetscInt    *ailen = a->ilen, *aj = a->j;
675   PetscInt    *bilen = b->ilen, *bj = b->j;
676   PetscInt     am          = aij->A->rmap->n, j;
677   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
678   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
679   PetscScalar *aa = a->a, *ba = b->a;
680 
681   PetscFunctionBegin;
682   /* Iterate over all rows of the matrix */
683   for (j = 0; j < am; j++) {
684     dnz_row = onz_row = 0;
685     rowstart_offd     = full_offd_i[j];
686     rowstart_diag     = full_diag_i[j];
687     /*  Iterate over all non-zero columns of the current row */
688     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
689       /* If column is in the diagonal */
690       if (mat_j[col] >= cstart && mat_j[col] < cend) {
691         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
692         aa[rowstart_diag + dnz_row] = mat_a[col];
693         dnz_row++;
694       } else { /* off-diagonal entries */
695         bj[rowstart_offd + onz_row] = mat_j[col];
696         ba[rowstart_offd + onz_row] = mat_a[col];
697         onz_row++;
698       }
699     }
700     ailen[j] = dnz_row;
701     bilen[j] = onz_row;
702   }
703   PetscFunctionReturn(PETSC_SUCCESS);
704 }
705 
706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
707 {
708   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
709   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
710   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
711 
712   PetscFunctionBegin;
713   for (i = 0; i < m; i++) {
714     if (idxm[i] < 0) continue; /* negative row */
715     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
716     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
717     row = idxm[i] - rstart;
718     for (j = 0; j < n; j++) {
719       if (idxn[j] < 0) continue; /* negative column */
720       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
721       if (idxn[j] >= cstart && idxn[j] < cend) {
722         col = idxn[j] - cstart;
723         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
724       } else {
725         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
726 #if defined(PETSC_USE_CTABLE)
727         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
728         col--;
729 #else
730         col = aij->colmap[idxn[j]] - 1;
731 #endif
732         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
733         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
734       }
735     }
736   }
737   PetscFunctionReturn(PETSC_SUCCESS);
738 }
739 
740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
741 {
742   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
743   PetscInt    nstash, reallocs;
744 
745   PetscFunctionBegin;
746   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
747 
748   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
749   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
750   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
751   PetscFunctionReturn(PETSC_SUCCESS);
752 }
753 
754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
755 {
756   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
757   PetscMPIInt  n;
758   PetscInt     i, j, rstart, ncols, flg;
759   PetscInt    *row, *col;
760   PetscBool    other_disassembled;
761   PetscScalar *val;
762 
763   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
764 
765   PetscFunctionBegin;
766   if (!aij->donotstash && !mat->nooffprocentries) {
767     while (1) {
768       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
769       if (!flg) break;
770 
771       for (i = 0; i < n;) {
772         /* Now identify the consecutive vals belonging to the same row */
773         for (j = i, rstart = row[j]; j < n; j++) {
774           if (row[j] != rstart) break;
775         }
776         if (j < n) ncols = j - i;
777         else ncols = n - i;
778         /* Now assemble all these values with a single function call */
779         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
780         i = j;
781       }
782     }
783     PetscCall(MatStashScatterEnd_Private(&mat->stash));
784   }
785 #if defined(PETSC_HAVE_DEVICE)
786   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
787   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
788   if (mat->boundtocpu) {
789     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
790     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
791   }
792 #endif
793   PetscCall(MatAssemblyBegin(aij->A, mode));
794   PetscCall(MatAssemblyEnd(aij->A, mode));
795 
796   /* determine if any processor has disassembled, if so we must
797      also disassemble ourself, in order that we may reassemble. */
798   /*
799      if nonzero structure of submatrix B cannot change then we know that
800      no processor disassembled thus we can skip this stuff
801   */
802   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
803     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
804     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
805       PetscCall(MatDisAssemble_MPIAIJ(mat));
806     }
807   }
808   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
809   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
810 #if defined(PETSC_HAVE_DEVICE)
811   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
812 #endif
813   PetscCall(MatAssemblyBegin(aij->B, mode));
814   PetscCall(MatAssemblyEnd(aij->B, mode));
815 
816   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
817 
818   aij->rowvalues = NULL;
819 
820   PetscCall(VecDestroy(&aij->diag));
821 
822   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
823   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
824     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
825     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
826   }
827 #if defined(PETSC_HAVE_DEVICE)
828   mat->offloadmask = PETSC_OFFLOAD_BOTH;
829 #endif
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
834 {
835   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
836 
837   PetscFunctionBegin;
838   PetscCall(MatZeroEntries(l->A));
839   PetscCall(MatZeroEntries(l->B));
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
844 {
845   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
846   PetscInt   *lrows;
847   PetscInt    r, len;
848   PetscBool   cong;
849 
850   PetscFunctionBegin;
851   /* get locally owned rows */
852   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
853   PetscCall(MatHasCongruentLayouts(A, &cong));
854   /* fix right hand side if needed */
855   if (x && b) {
856     const PetscScalar *xx;
857     PetscScalar       *bb;
858 
859     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
860     PetscCall(VecGetArrayRead(x, &xx));
861     PetscCall(VecGetArray(b, &bb));
862     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
863     PetscCall(VecRestoreArrayRead(x, &xx));
864     PetscCall(VecRestoreArray(b, &bb));
865   }
866 
867   if (diag != 0.0 && cong) {
868     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
869     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
870   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
871     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
872     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
873     PetscInt    nnwA, nnwB;
874     PetscBool   nnzA, nnzB;
875 
876     nnwA = aijA->nonew;
877     nnwB = aijB->nonew;
878     nnzA = aijA->keepnonzeropattern;
879     nnzB = aijB->keepnonzeropattern;
880     if (!nnzA) {
881       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
882       aijA->nonew = 0;
883     }
884     if (!nnzB) {
885       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
886       aijB->nonew = 0;
887     }
888     /* Must zero here before the next loop */
889     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
890     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
891     for (r = 0; r < len; ++r) {
892       const PetscInt row = lrows[r] + A->rmap->rstart;
893       if (row >= A->cmap->N) continue;
894       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
895     }
896     aijA->nonew = nnwA;
897     aijB->nonew = nnwB;
898   } else {
899     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
900     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
901   }
902   PetscCall(PetscFree(lrows));
903   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
904   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
905 
906   /* only change matrix nonzero state if pattern was allowed to be changed */
907   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
908     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
909     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
910   }
911   PetscFunctionReturn(PETSC_SUCCESS);
912 }
913 
914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
915 {
916   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
917   PetscMPIInt        n = A->rmap->n;
918   PetscInt           i, j, r, m, len = 0;
919   PetscInt          *lrows, *owners = A->rmap->range;
920   PetscMPIInt        p = 0;
921   PetscSFNode       *rrows;
922   PetscSF            sf;
923   const PetscScalar *xx;
924   PetscScalar       *bb, *mask, *aij_a;
925   Vec                xmask, lmask;
926   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
927   const PetscInt    *aj, *ii, *ridx;
928   PetscScalar       *aa;
929 
930   PetscFunctionBegin;
931   /* Create SF where leaves are input rows and roots are owned rows */
932   PetscCall(PetscMalloc1(n, &lrows));
933   for (r = 0; r < n; ++r) lrows[r] = -1;
934   PetscCall(PetscMalloc1(N, &rrows));
935   for (r = 0; r < N; ++r) {
936     const PetscInt idx = rows[r];
937     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
938     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
939       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
940     }
941     rrows[r].rank  = p;
942     rrows[r].index = rows[r] - owners[p];
943   }
944   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
945   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
946   /* Collect flags for rows to be zeroed */
947   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
948   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
949   PetscCall(PetscSFDestroy(&sf));
950   /* Compress and put in row numbers */
951   for (r = 0; r < n; ++r)
952     if (lrows[r] >= 0) lrows[len++] = r;
953   /* zero diagonal part of matrix */
954   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
955   /* handle off-diagonal part of matrix */
956   PetscCall(MatCreateVecs(A, &xmask, NULL));
957   PetscCall(VecDuplicate(l->lvec, &lmask));
958   PetscCall(VecGetArray(xmask, &bb));
959   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
960   PetscCall(VecRestoreArray(xmask, &bb));
961   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
962   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
963   PetscCall(VecDestroy(&xmask));
964   if (x && b) { /* this code is buggy when the row and column layout don't match */
965     PetscBool cong;
966 
967     PetscCall(MatHasCongruentLayouts(A, &cong));
968     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
969     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
970     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
971     PetscCall(VecGetArrayRead(l->lvec, &xx));
972     PetscCall(VecGetArray(b, &bb));
973   }
974   PetscCall(VecGetArray(lmask, &mask));
975   /* remove zeroed rows of off-diagonal matrix */
976   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
977   ii = aij->i;
978   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
979   /* loop over all elements of off process part of matrix zeroing removed columns*/
980   if (aij->compressedrow.use) {
981     m    = aij->compressedrow.nrows;
982     ii   = aij->compressedrow.i;
983     ridx = aij->compressedrow.rindex;
984     for (i = 0; i < m; i++) {
985       n  = ii[i + 1] - ii[i];
986       aj = aij->j + ii[i];
987       aa = aij_a + ii[i];
988 
989       for (j = 0; j < n; j++) {
990         if (PetscAbsScalar(mask[*aj])) {
991           if (b) bb[*ridx] -= *aa * xx[*aj];
992           *aa = 0.0;
993         }
994         aa++;
995         aj++;
996       }
997       ridx++;
998     }
999   } else { /* do not use compressed row format */
1000     m = l->B->rmap->n;
1001     for (i = 0; i < m; i++) {
1002       n  = ii[i + 1] - ii[i];
1003       aj = aij->j + ii[i];
1004       aa = aij_a + ii[i];
1005       for (j = 0; j < n; j++) {
1006         if (PetscAbsScalar(mask[*aj])) {
1007           if (b) bb[i] -= *aa * xx[*aj];
1008           *aa = 0.0;
1009         }
1010         aa++;
1011         aj++;
1012       }
1013     }
1014   }
1015   if (x && b) {
1016     PetscCall(VecRestoreArray(b, &bb));
1017     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1018   }
1019   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1020   PetscCall(VecRestoreArray(lmask, &mask));
1021   PetscCall(VecDestroy(&lmask));
1022   PetscCall(PetscFree(lrows));
1023 
1024   /* only change matrix nonzero state if pattern was allowed to be changed */
1025   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1026     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1027     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1028   }
1029   PetscFunctionReturn(PETSC_SUCCESS);
1030 }
1031 
1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1033 {
1034   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1035   PetscInt    nt;
1036   VecScatter  Mvctx = a->Mvctx;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(VecGetLocalSize(xx, &nt));
1040   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1041   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1042   PetscUseTypeMethod(a->A, mult, xx, yy);
1043   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1044   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1045   PetscFunctionReturn(PETSC_SUCCESS);
1046 }
1047 
1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1049 {
1050   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1051 
1052   PetscFunctionBegin;
1053   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1058 {
1059   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1060   VecScatter  Mvctx = a->Mvctx;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1065   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1066   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1071 {
1072   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1073 
1074   PetscFunctionBegin;
1075   /* do nondiagonal part */
1076   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1077   /* do local part */
1078   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1079   /* add partial results together */
1080   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1081   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1082   PetscFunctionReturn(PETSC_SUCCESS);
1083 }
1084 
1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1086 {
1087   MPI_Comm    comm;
1088   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1089   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1090   IS          Me, Notme;
1091   PetscInt    M, N, first, last, *notme, i;
1092   PetscBool   lf;
1093   PetscMPIInt size;
1094 
1095   PetscFunctionBegin;
1096   /* Easy test: symmetric diagonal block */
1097   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1098   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1099   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1100   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1101   PetscCallMPI(MPI_Comm_size(comm, &size));
1102   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1103 
1104   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1105   PetscCall(MatGetSize(Amat, &M, &N));
1106   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1107   PetscCall(PetscMalloc1(N - last + first, &notme));
1108   for (i = 0; i < first; i++) notme[i] = i;
1109   for (i = last; i < M; i++) notme[i - last + first] = i;
1110   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1111   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1112   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1113   Aoff = Aoffs[0];
1114   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1115   Boff = Boffs[0];
1116   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1117   PetscCall(MatDestroyMatrices(1, &Aoffs));
1118   PetscCall(MatDestroyMatrices(1, &Boffs));
1119   PetscCall(ISDestroy(&Me));
1120   PetscCall(ISDestroy(&Notme));
1121   PetscCall(PetscFree(notme));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1126 {
1127   PetscFunctionBegin;
1128   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1129   PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131 
1132 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1133 {
1134   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1135 
1136   PetscFunctionBegin;
1137   /* do nondiagonal part */
1138   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1139   /* do local part */
1140   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1141   /* add partial results together */
1142   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1143   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1144   PetscFunctionReturn(PETSC_SUCCESS);
1145 }
1146 
1147 /*
1148   This only works correctly for square matrices where the subblock A->A is the
1149    diagonal block
1150 */
1151 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1152 {
1153   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1154 
1155   PetscFunctionBegin;
1156   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1157   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1158   PetscCall(MatGetDiagonal(a->A, v));
1159   PetscFunctionReturn(PETSC_SUCCESS);
1160 }
1161 
1162 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1163 {
1164   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1165 
1166   PetscFunctionBegin;
1167   PetscCall(MatScale(a->A, aa));
1168   PetscCall(MatScale(a->B, aa));
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1173 {
1174   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1175   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1176   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1177   const PetscInt    *garray = aij->garray;
1178   const PetscScalar *aa, *ba;
1179   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1180   PetscInt64         nz, hnz;
1181   PetscInt          *rowlens;
1182   PetscInt          *colidxs;
1183   PetscScalar       *matvals;
1184   PetscMPIInt        rank;
1185 
1186   PetscFunctionBegin;
1187   PetscCall(PetscViewerSetUp(viewer));
1188 
1189   M  = mat->rmap->N;
1190   N  = mat->cmap->N;
1191   m  = mat->rmap->n;
1192   rs = mat->rmap->rstart;
1193   cs = mat->cmap->rstart;
1194   nz = A->nz + B->nz;
1195 
1196   /* write matrix header */
1197   header[0] = MAT_FILE_CLASSID;
1198   header[1] = M;
1199   header[2] = N;
1200   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1201   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1202   if (rank == 0) {
1203     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1204     else header[3] = (PetscInt)hnz;
1205   }
1206   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1207 
1208   /* fill in and store row lengths  */
1209   PetscCall(PetscMalloc1(m, &rowlens));
1210   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1211   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1212   PetscCall(PetscFree(rowlens));
1213 
1214   /* fill in and store column indices */
1215   PetscCall(PetscMalloc1(nz, &colidxs));
1216   for (cnt = 0, i = 0; i < m; i++) {
1217     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1218       if (garray[B->j[jb]] > cs) break;
1219       colidxs[cnt++] = garray[B->j[jb]];
1220     }
1221     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1222     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1223   }
1224   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1225   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1226   PetscCall(PetscFree(colidxs));
1227 
1228   /* fill in and store nonzero values */
1229   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1230   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1231   PetscCall(PetscMalloc1(nz, &matvals));
1232   for (cnt = 0, i = 0; i < m; i++) {
1233     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1234       if (garray[B->j[jb]] > cs) break;
1235       matvals[cnt++] = ba[jb];
1236     }
1237     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1238     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1239   }
1240   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1241   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1242   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1243   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1244   PetscCall(PetscFree(matvals));
1245 
1246   /* write block size option to the viewer's .info file */
1247   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1248   PetscFunctionReturn(PETSC_SUCCESS);
1249 }
1250 
1251 #include <petscdraw.h>
1252 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1253 {
1254   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1255   PetscMPIInt       rank = aij->rank, size = aij->size;
1256   PetscBool         isdraw, iascii, isbinary;
1257   PetscViewer       sviewer;
1258   PetscViewerFormat format;
1259 
1260   PetscFunctionBegin;
1261   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1262   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1263   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1264   if (iascii) {
1265     PetscCall(PetscViewerGetFormat(viewer, &format));
1266     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1267       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1268       PetscCall(PetscMalloc1(size, &nz));
1269       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1270       for (i = 0; i < (PetscInt)size; i++) {
1271         nmax = PetscMax(nmax, nz[i]);
1272         nmin = PetscMin(nmin, nz[i]);
1273         navg += nz[i];
1274       }
1275       PetscCall(PetscFree(nz));
1276       navg = navg / size;
1277       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1278       PetscFunctionReturn(PETSC_SUCCESS);
1279     }
1280     PetscCall(PetscViewerGetFormat(viewer, &format));
1281     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1282       MatInfo   info;
1283       PetscInt *inodes = NULL;
1284 
1285       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1286       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1287       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1288       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1289       if (!inodes) {
1290         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1291                                                      (double)info.memory));
1292       } else {
1293         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1294                                                      (double)info.memory));
1295       }
1296       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1297       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1298       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1299       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1300       PetscCall(PetscViewerFlush(viewer));
1301       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1302       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1303       PetscCall(VecScatterView(aij->Mvctx, viewer));
1304       PetscFunctionReturn(PETSC_SUCCESS);
1305     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1306       PetscInt inodecount, inodelimit, *inodes;
1307       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1308       if (inodes) {
1309         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1310       } else {
1311         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1312       }
1313       PetscFunctionReturn(PETSC_SUCCESS);
1314     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1315       PetscFunctionReturn(PETSC_SUCCESS);
1316     }
1317   } else if (isbinary) {
1318     if (size == 1) {
1319       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1320       PetscCall(MatView(aij->A, viewer));
1321     } else {
1322       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1323     }
1324     PetscFunctionReturn(PETSC_SUCCESS);
1325   } else if (iascii && size == 1) {
1326     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1327     PetscCall(MatView(aij->A, viewer));
1328     PetscFunctionReturn(PETSC_SUCCESS);
1329   } else if (isdraw) {
1330     PetscDraw draw;
1331     PetscBool isnull;
1332     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1333     PetscCall(PetscDrawIsNull(draw, &isnull));
1334     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1335   }
1336 
1337   { /* assemble the entire matrix onto first processor */
1338     Mat A = NULL, Av;
1339     IS  isrow, iscol;
1340 
1341     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1342     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1343     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1344     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1345     /*  The commented code uses MatCreateSubMatrices instead */
1346     /*
1347     Mat *AA, A = NULL, Av;
1348     IS  isrow,iscol;
1349 
1350     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1351     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1352     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1353     if (rank == 0) {
1354        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1355        A    = AA[0];
1356        Av   = AA[0];
1357     }
1358     PetscCall(MatDestroySubMatrices(1,&AA));
1359 */
1360     PetscCall(ISDestroy(&iscol));
1361     PetscCall(ISDestroy(&isrow));
1362     /*
1363        Everyone has to call to draw the matrix since the graphics waits are
1364        synchronized across all processors that share the PetscDraw object
1365     */
1366     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1367     if (rank == 0) {
1368       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1369       PetscCall(MatView_SeqAIJ(Av, sviewer));
1370     }
1371     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1372     PetscCall(MatDestroy(&A));
1373   }
1374   PetscFunctionReturn(PETSC_SUCCESS);
1375 }
1376 
1377 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1378 {
1379   PetscBool iascii, isdraw, issocket, isbinary;
1380 
1381   PetscFunctionBegin;
1382   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1383   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1384   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1385   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1386   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1387   PetscFunctionReturn(PETSC_SUCCESS);
1388 }
1389 
1390 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1391 {
1392   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1393   Vec         bb1 = NULL;
1394   PetscBool   hasop;
1395 
1396   PetscFunctionBegin;
1397   if (flag == SOR_APPLY_UPPER) {
1398     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1399     PetscFunctionReturn(PETSC_SUCCESS);
1400   }
1401 
1402   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1403 
1404   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1405     if (flag & SOR_ZERO_INITIAL_GUESS) {
1406       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1407       its--;
1408     }
1409 
1410     while (its--) {
1411       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1412       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1413 
1414       /* update rhs: bb1 = bb - B*x */
1415       PetscCall(VecScale(mat->lvec, -1.0));
1416       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1417 
1418       /* local sweep */
1419       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1420     }
1421   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1422     if (flag & SOR_ZERO_INITIAL_GUESS) {
1423       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1424       its--;
1425     }
1426     while (its--) {
1427       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1428       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1429 
1430       /* update rhs: bb1 = bb - B*x */
1431       PetscCall(VecScale(mat->lvec, -1.0));
1432       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1433 
1434       /* local sweep */
1435       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1436     }
1437   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1438     if (flag & SOR_ZERO_INITIAL_GUESS) {
1439       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1440       its--;
1441     }
1442     while (its--) {
1443       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1444       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1445 
1446       /* update rhs: bb1 = bb - B*x */
1447       PetscCall(VecScale(mat->lvec, -1.0));
1448       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1449 
1450       /* local sweep */
1451       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1452     }
1453   } else if (flag & SOR_EISENSTAT) {
1454     Vec xx1;
1455 
1456     PetscCall(VecDuplicate(bb, &xx1));
1457     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1458 
1459     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1460     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1461     if (!mat->diag) {
1462       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1463       PetscCall(MatGetDiagonal(matin, mat->diag));
1464     }
1465     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1466     if (hasop) {
1467       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1468     } else {
1469       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1470     }
1471     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1472 
1473     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1474 
1475     /* local sweep */
1476     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1477     PetscCall(VecAXPY(xx, 1.0, xx1));
1478     PetscCall(VecDestroy(&xx1));
1479   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1480 
1481   PetscCall(VecDestroy(&bb1));
1482 
1483   matin->factorerrortype = mat->A->factorerrortype;
1484   PetscFunctionReturn(PETSC_SUCCESS);
1485 }
1486 
1487 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1488 {
1489   Mat             aA, aB, Aperm;
1490   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1491   PetscScalar    *aa, *ba;
1492   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1493   PetscSF         rowsf, sf;
1494   IS              parcolp = NULL;
1495   PetscBool       done;
1496 
1497   PetscFunctionBegin;
1498   PetscCall(MatGetLocalSize(A, &m, &n));
1499   PetscCall(ISGetIndices(rowp, &rwant));
1500   PetscCall(ISGetIndices(colp, &cwant));
1501   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1502 
1503   /* Invert row permutation to find out where my rows should go */
1504   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1505   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1506   PetscCall(PetscSFSetFromOptions(rowsf));
1507   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1508   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1509   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1510 
1511   /* Invert column permutation to find out where my columns should go */
1512   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1513   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1514   PetscCall(PetscSFSetFromOptions(sf));
1515   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1516   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1517   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1518   PetscCall(PetscSFDestroy(&sf));
1519 
1520   PetscCall(ISRestoreIndices(rowp, &rwant));
1521   PetscCall(ISRestoreIndices(colp, &cwant));
1522   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1523 
1524   /* Find out where my gcols should go */
1525   PetscCall(MatGetSize(aB, NULL, &ng));
1526   PetscCall(PetscMalloc1(ng, &gcdest));
1527   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1528   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1529   PetscCall(PetscSFSetFromOptions(sf));
1530   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1531   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1532   PetscCall(PetscSFDestroy(&sf));
1533 
1534   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1535   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1536   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1537   for (i = 0; i < m; i++) {
1538     PetscInt    row = rdest[i];
1539     PetscMPIInt rowner;
1540     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1541     for (j = ai[i]; j < ai[i + 1]; j++) {
1542       PetscInt    col = cdest[aj[j]];
1543       PetscMPIInt cowner;
1544       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1545       if (rowner == cowner) dnnz[i]++;
1546       else onnz[i]++;
1547     }
1548     for (j = bi[i]; j < bi[i + 1]; j++) {
1549       PetscInt    col = gcdest[bj[j]];
1550       PetscMPIInt cowner;
1551       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1552       if (rowner == cowner) dnnz[i]++;
1553       else onnz[i]++;
1554     }
1555   }
1556   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1557   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1558   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1559   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1560   PetscCall(PetscSFDestroy(&rowsf));
1561 
1562   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1563   PetscCall(MatSeqAIJGetArray(aA, &aa));
1564   PetscCall(MatSeqAIJGetArray(aB, &ba));
1565   for (i = 0; i < m; i++) {
1566     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1567     PetscInt  j0, rowlen;
1568     rowlen = ai[i + 1] - ai[i];
1569     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1570       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1571       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1572     }
1573     rowlen = bi[i + 1] - bi[i];
1574     for (j0 = j = 0; j < rowlen; j0 = j) {
1575       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1576       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1577     }
1578   }
1579   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1580   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1581   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1582   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1583   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1584   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1585   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1586   PetscCall(PetscFree3(work, rdest, cdest));
1587   PetscCall(PetscFree(gcdest));
1588   if (parcolp) PetscCall(ISDestroy(&colp));
1589   *B = Aperm;
1590   PetscFunctionReturn(PETSC_SUCCESS);
1591 }
1592 
1593 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1594 {
1595   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1596 
1597   PetscFunctionBegin;
1598   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1599   if (ghosts) *ghosts = aij->garray;
1600   PetscFunctionReturn(PETSC_SUCCESS);
1601 }
1602 
1603 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1604 {
1605   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1606   Mat            A = mat->A, B = mat->B;
1607   PetscLogDouble isend[5], irecv[5];
1608 
1609   PetscFunctionBegin;
1610   info->block_size = 1.0;
1611   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1612 
1613   isend[0] = info->nz_used;
1614   isend[1] = info->nz_allocated;
1615   isend[2] = info->nz_unneeded;
1616   isend[3] = info->memory;
1617   isend[4] = info->mallocs;
1618 
1619   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1620 
1621   isend[0] += info->nz_used;
1622   isend[1] += info->nz_allocated;
1623   isend[2] += info->nz_unneeded;
1624   isend[3] += info->memory;
1625   isend[4] += info->mallocs;
1626   if (flag == MAT_LOCAL) {
1627     info->nz_used      = isend[0];
1628     info->nz_allocated = isend[1];
1629     info->nz_unneeded  = isend[2];
1630     info->memory       = isend[3];
1631     info->mallocs      = isend[4];
1632   } else if (flag == MAT_GLOBAL_MAX) {
1633     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1634 
1635     info->nz_used      = irecv[0];
1636     info->nz_allocated = irecv[1];
1637     info->nz_unneeded  = irecv[2];
1638     info->memory       = irecv[3];
1639     info->mallocs      = irecv[4];
1640   } else if (flag == MAT_GLOBAL_SUM) {
1641     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1642 
1643     info->nz_used      = irecv[0];
1644     info->nz_allocated = irecv[1];
1645     info->nz_unneeded  = irecv[2];
1646     info->memory       = irecv[3];
1647     info->mallocs      = irecv[4];
1648   }
1649   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1650   info->fill_ratio_needed = 0;
1651   info->factor_mallocs    = 0;
1652   PetscFunctionReturn(PETSC_SUCCESS);
1653 }
1654 
1655 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1656 {
1657   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1658 
1659   PetscFunctionBegin;
1660   switch (op) {
1661   case MAT_NEW_NONZERO_LOCATIONS:
1662   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1663   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1664   case MAT_KEEP_NONZERO_PATTERN:
1665   case MAT_NEW_NONZERO_LOCATION_ERR:
1666   case MAT_USE_INODES:
1667   case MAT_IGNORE_ZERO_ENTRIES:
1668   case MAT_FORM_EXPLICIT_TRANSPOSE:
1669     MatCheckPreallocated(A, 1);
1670     PetscCall(MatSetOption(a->A, op, flg));
1671     PetscCall(MatSetOption(a->B, op, flg));
1672     break;
1673   case MAT_ROW_ORIENTED:
1674     MatCheckPreallocated(A, 1);
1675     a->roworiented = flg;
1676 
1677     PetscCall(MatSetOption(a->A, op, flg));
1678     PetscCall(MatSetOption(a->B, op, flg));
1679     break;
1680   case MAT_FORCE_DIAGONAL_ENTRIES:
1681   case MAT_SORTED_FULL:
1682     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1683     break;
1684   case MAT_IGNORE_OFF_PROC_ENTRIES:
1685     a->donotstash = flg;
1686     break;
1687   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1688   case MAT_SPD:
1689   case MAT_SYMMETRIC:
1690   case MAT_STRUCTURALLY_SYMMETRIC:
1691   case MAT_HERMITIAN:
1692   case MAT_SYMMETRY_ETERNAL:
1693   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1694   case MAT_SPD_ETERNAL:
1695     /* if the diagonal matrix is square it inherits some of the properties above */
1696     break;
1697   case MAT_SUBMAT_SINGLEIS:
1698     A->submat_singleis = flg;
1699     break;
1700   case MAT_STRUCTURE_ONLY:
1701     /* The option is handled directly by MatSetOption() */
1702     break;
1703   default:
1704     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1705   }
1706   PetscFunctionReturn(PETSC_SUCCESS);
1707 }
1708 
1709 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1710 {
1711   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1712   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1713   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1714   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1715   PetscInt    *cmap, *idx_p;
1716 
1717   PetscFunctionBegin;
1718   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1719   mat->getrowactive = PETSC_TRUE;
1720 
1721   if (!mat->rowvalues && (idx || v)) {
1722     /*
1723         allocate enough space to hold information from the longest row.
1724     */
1725     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1726     PetscInt    max = 1, tmp;
1727     for (i = 0; i < matin->rmap->n; i++) {
1728       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1729       if (max < tmp) max = tmp;
1730     }
1731     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1732   }
1733 
1734   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1735   lrow = row - rstart;
1736 
1737   pvA = &vworkA;
1738   pcA = &cworkA;
1739   pvB = &vworkB;
1740   pcB = &cworkB;
1741   if (!v) {
1742     pvA = NULL;
1743     pvB = NULL;
1744   }
1745   if (!idx) {
1746     pcA = NULL;
1747     if (!v) pcB = NULL;
1748   }
1749   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1750   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1751   nztot = nzA + nzB;
1752 
1753   cmap = mat->garray;
1754   if (v || idx) {
1755     if (nztot) {
1756       /* Sort by increasing column numbers, assuming A and B already sorted */
1757       PetscInt imark = -1;
1758       if (v) {
1759         *v = v_p = mat->rowvalues;
1760         for (i = 0; i < nzB; i++) {
1761           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1762           else break;
1763         }
1764         imark = i;
1765         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1766         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1767       }
1768       if (idx) {
1769         *idx = idx_p = mat->rowindices;
1770         if (imark > -1) {
1771           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1772         } else {
1773           for (i = 0; i < nzB; i++) {
1774             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1775             else break;
1776           }
1777           imark = i;
1778         }
1779         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1780         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1781       }
1782     } else {
1783       if (idx) *idx = NULL;
1784       if (v) *v = NULL;
1785     }
1786   }
1787   *nz = nztot;
1788   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1789   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1790   PetscFunctionReturn(PETSC_SUCCESS);
1791 }
1792 
1793 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1794 {
1795   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1796 
1797   PetscFunctionBegin;
1798   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1799   aij->getrowactive = PETSC_FALSE;
1800   PetscFunctionReturn(PETSC_SUCCESS);
1801 }
1802 
1803 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1804 {
1805   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1806   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1807   PetscInt         i, j, cstart = mat->cmap->rstart;
1808   PetscReal        sum = 0.0;
1809   const MatScalar *v, *amata, *bmata;
1810 
1811   PetscFunctionBegin;
1812   if (aij->size == 1) {
1813     PetscCall(MatNorm(aij->A, type, norm));
1814   } else {
1815     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1816     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1817     if (type == NORM_FROBENIUS) {
1818       v = amata;
1819       for (i = 0; i < amat->nz; i++) {
1820         sum += PetscRealPart(PetscConj(*v) * (*v));
1821         v++;
1822       }
1823       v = bmata;
1824       for (i = 0; i < bmat->nz; i++) {
1825         sum += PetscRealPart(PetscConj(*v) * (*v));
1826         v++;
1827       }
1828       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1829       *norm = PetscSqrtReal(*norm);
1830       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1831     } else if (type == NORM_1) { /* max column norm */
1832       PetscReal *tmp, *tmp2;
1833       PetscInt  *jj, *garray = aij->garray;
1834       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1835       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1836       *norm = 0.0;
1837       v     = amata;
1838       jj    = amat->j;
1839       for (j = 0; j < amat->nz; j++) {
1840         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1841         v++;
1842       }
1843       v  = bmata;
1844       jj = bmat->j;
1845       for (j = 0; j < bmat->nz; j++) {
1846         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1847         v++;
1848       }
1849       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1850       for (j = 0; j < mat->cmap->N; j++) {
1851         if (tmp2[j] > *norm) *norm = tmp2[j];
1852       }
1853       PetscCall(PetscFree(tmp));
1854       PetscCall(PetscFree(tmp2));
1855       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1856     } else if (type == NORM_INFINITY) { /* max row norm */
1857       PetscReal ntemp = 0.0;
1858       for (j = 0; j < aij->A->rmap->n; j++) {
1859         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1860         sum = 0.0;
1861         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1862           sum += PetscAbsScalar(*v);
1863           v++;
1864         }
1865         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1866         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1867           sum += PetscAbsScalar(*v);
1868           v++;
1869         }
1870         if (sum > ntemp) ntemp = sum;
1871       }
1872       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1873       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1874     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1875     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1876     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1877   }
1878   PetscFunctionReturn(PETSC_SUCCESS);
1879 }
1880 
1881 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1882 {
1883   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1884   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1885   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1886   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1887   Mat              B, A_diag, *B_diag;
1888   const MatScalar *pbv, *bv;
1889 
1890   PetscFunctionBegin;
1891   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1892   ma = A->rmap->n;
1893   na = A->cmap->n;
1894   mb = a->B->rmap->n;
1895   nb = a->B->cmap->n;
1896   ai = Aloc->i;
1897   aj = Aloc->j;
1898   bi = Bloc->i;
1899   bj = Bloc->j;
1900   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1901     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1902     PetscSFNode         *oloc;
1903     PETSC_UNUSED PetscSF sf;
1904 
1905     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1906     /* compute d_nnz for preallocation */
1907     PetscCall(PetscArrayzero(d_nnz, na));
1908     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1909     /* compute local off-diagonal contributions */
1910     PetscCall(PetscArrayzero(g_nnz, nb));
1911     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1912     /* map those to global */
1913     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1914     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1915     PetscCall(PetscSFSetFromOptions(sf));
1916     PetscCall(PetscArrayzero(o_nnz, na));
1917     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1918     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1919     PetscCall(PetscSFDestroy(&sf));
1920 
1921     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1922     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1923     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1924     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1925     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1926     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1927   } else {
1928     B = *matout;
1929     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1930   }
1931 
1932   b           = (Mat_MPIAIJ *)B->data;
1933   A_diag      = a->A;
1934   B_diag      = &b->A;
1935   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1936   A_diag_ncol = A_diag->cmap->N;
1937   B_diag_ilen = sub_B_diag->ilen;
1938   B_diag_i    = sub_B_diag->i;
1939 
1940   /* Set ilen for diagonal of B */
1941   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1942 
1943   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1944   very quickly (=without using MatSetValues), because all writes are local. */
1945   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1946   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1947 
1948   /* copy over the B part */
1949   PetscCall(PetscMalloc1(bi[mb], &cols));
1950   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1951   pbv = bv;
1952   row = A->rmap->rstart;
1953   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1954   cols_tmp = cols;
1955   for (i = 0; i < mb; i++) {
1956     ncol = bi[i + 1] - bi[i];
1957     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1958     row++;
1959     if (pbv) pbv += ncol;
1960     if (cols_tmp) cols_tmp += ncol;
1961   }
1962   PetscCall(PetscFree(cols));
1963   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1964 
1965   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1966   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1967   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1968     *matout = B;
1969   } else {
1970     PetscCall(MatHeaderMerge(A, &B));
1971   }
1972   PetscFunctionReturn(PETSC_SUCCESS);
1973 }
1974 
1975 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1976 {
1977   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1978   Mat         a = aij->A, b = aij->B;
1979   PetscInt    s1, s2, s3;
1980 
1981   PetscFunctionBegin;
1982   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1983   if (rr) {
1984     PetscCall(VecGetLocalSize(rr, &s1));
1985     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1986     /* Overlap communication with computation. */
1987     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1988   }
1989   if (ll) {
1990     PetscCall(VecGetLocalSize(ll, &s1));
1991     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1992     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1993   }
1994   /* scale  the diagonal block */
1995   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1996 
1997   if (rr) {
1998     /* Do a scatter end and then right scale the off-diagonal block */
1999     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2000     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2001   }
2002   PetscFunctionReturn(PETSC_SUCCESS);
2003 }
2004 
2005 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2006 {
2007   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2008 
2009   PetscFunctionBegin;
2010   PetscCall(MatSetUnfactored(a->A));
2011   PetscFunctionReturn(PETSC_SUCCESS);
2012 }
2013 
2014 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2015 {
2016   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2017   Mat         a, b, c, d;
2018   PetscBool   flg;
2019 
2020   PetscFunctionBegin;
2021   a = matA->A;
2022   b = matA->B;
2023   c = matB->A;
2024   d = matB->B;
2025 
2026   PetscCall(MatEqual(a, c, &flg));
2027   if (flg) PetscCall(MatEqual(b, d, &flg));
2028   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2029   PetscFunctionReturn(PETSC_SUCCESS);
2030 }
2031 
2032 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2033 {
2034   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2035   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2036 
2037   PetscFunctionBegin;
2038   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2039   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2040     /* because of the column compression in the off-processor part of the matrix a->B,
2041        the number of columns in a->B and b->B may be different, hence we cannot call
2042        the MatCopy() directly on the two parts. If need be, we can provide a more
2043        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2044        then copying the submatrices */
2045     PetscCall(MatCopy_Basic(A, B, str));
2046   } else {
2047     PetscCall(MatCopy(a->A, b->A, str));
2048     PetscCall(MatCopy(a->B, b->B, str));
2049   }
2050   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2051   PetscFunctionReturn(PETSC_SUCCESS);
2052 }
2053 
2054 /*
2055    Computes the number of nonzeros per row needed for preallocation when X and Y
2056    have different nonzero structure.
2057 */
2058 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2059 {
2060   PetscInt i, j, k, nzx, nzy;
2061 
2062   PetscFunctionBegin;
2063   /* Set the number of nonzeros in the new matrix */
2064   for (i = 0; i < m; i++) {
2065     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2066     nzx    = xi[i + 1] - xi[i];
2067     nzy    = yi[i + 1] - yi[i];
2068     nnz[i] = 0;
2069     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2070       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2071       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2072       nnz[i]++;
2073     }
2074     for (; k < nzy; k++) nnz[i]++;
2075   }
2076   PetscFunctionReturn(PETSC_SUCCESS);
2077 }
2078 
2079 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2080 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2081 {
2082   PetscInt    m = Y->rmap->N;
2083   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2084   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2085 
2086   PetscFunctionBegin;
2087   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2088   PetscFunctionReturn(PETSC_SUCCESS);
2089 }
2090 
2091 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2092 {
2093   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2094 
2095   PetscFunctionBegin;
2096   if (str == SAME_NONZERO_PATTERN) {
2097     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2098     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2099   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2100     PetscCall(MatAXPY_Basic(Y, a, X, str));
2101   } else {
2102     Mat       B;
2103     PetscInt *nnz_d, *nnz_o;
2104 
2105     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2106     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2107     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2108     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2109     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2110     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2111     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2112     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2113     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2114     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2115     PetscCall(MatHeaderMerge(Y, &B));
2116     PetscCall(PetscFree(nnz_d));
2117     PetscCall(PetscFree(nnz_o));
2118   }
2119   PetscFunctionReturn(PETSC_SUCCESS);
2120 }
2121 
2122 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2123 
2124 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2125 {
2126   PetscFunctionBegin;
2127   if (PetscDefined(USE_COMPLEX)) {
2128     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2129 
2130     PetscCall(MatConjugate_SeqAIJ(aij->A));
2131     PetscCall(MatConjugate_SeqAIJ(aij->B));
2132   }
2133   PetscFunctionReturn(PETSC_SUCCESS);
2134 }
2135 
2136 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2137 {
2138   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2139 
2140   PetscFunctionBegin;
2141   PetscCall(MatRealPart(a->A));
2142   PetscCall(MatRealPart(a->B));
2143   PetscFunctionReturn(PETSC_SUCCESS);
2144 }
2145 
2146 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2147 {
2148   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2149 
2150   PetscFunctionBegin;
2151   PetscCall(MatImaginaryPart(a->A));
2152   PetscCall(MatImaginaryPart(a->B));
2153   PetscFunctionReturn(PETSC_SUCCESS);
2154 }
2155 
2156 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2157 {
2158   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2159   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2160   PetscScalar       *va, *vv;
2161   Vec                vB, vA;
2162   const PetscScalar *vb;
2163 
2164   PetscFunctionBegin;
2165   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2166   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2167 
2168   PetscCall(VecGetArrayWrite(vA, &va));
2169   if (idx) {
2170     for (i = 0; i < m; i++) {
2171       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2172     }
2173   }
2174 
2175   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2176   PetscCall(PetscMalloc1(m, &idxb));
2177   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2178 
2179   PetscCall(VecGetArrayWrite(v, &vv));
2180   PetscCall(VecGetArrayRead(vB, &vb));
2181   for (i = 0; i < m; i++) {
2182     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2183       vv[i] = vb[i];
2184       if (idx) idx[i] = a->garray[idxb[i]];
2185     } else {
2186       vv[i] = va[i];
2187       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2188     }
2189   }
2190   PetscCall(VecRestoreArrayWrite(vA, &vv));
2191   PetscCall(VecRestoreArrayWrite(vA, &va));
2192   PetscCall(VecRestoreArrayRead(vB, &vb));
2193   PetscCall(PetscFree(idxb));
2194   PetscCall(VecDestroy(&vA));
2195   PetscCall(VecDestroy(&vB));
2196   PetscFunctionReturn(PETSC_SUCCESS);
2197 }
2198 
2199 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2200 {
2201   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2202   PetscInt    m = A->rmap->n;
2203   Vec         vB, vA;
2204 
2205   PetscFunctionBegin;
2206   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2207   PetscCall(MatGetRowSumAbs(a->A, vA));
2208   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2209   PetscCall(MatGetRowSumAbs(a->B, vB));
2210   PetscCall(VecAXPY(vA, 1.0, vB));
2211   PetscCall(VecDestroy(&vB));
2212   PetscCall(VecCopy(vA, v));
2213   PetscCall(VecDestroy(&vA));
2214   PetscFunctionReturn(PETSC_SUCCESS);
2215 }
2216 
2217 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2218 {
2219   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2220   PetscInt           m = A->rmap->n, n = A->cmap->n;
2221   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2222   PetscInt          *cmap = mat->garray;
2223   PetscInt          *diagIdx, *offdiagIdx;
2224   Vec                diagV, offdiagV;
2225   PetscScalar       *a, *diagA, *offdiagA;
2226   const PetscScalar *ba, *bav;
2227   PetscInt           r, j, col, ncols, *bi, *bj;
2228   Mat                B = mat->B;
2229   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2230 
2231   PetscFunctionBegin;
2232   /* When a process holds entire A and other processes have no entry */
2233   if (A->cmap->N == n) {
2234     PetscCall(VecGetArrayWrite(v, &diagA));
2235     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2236     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2237     PetscCall(VecDestroy(&diagV));
2238     PetscCall(VecRestoreArrayWrite(v, &diagA));
2239     PetscFunctionReturn(PETSC_SUCCESS);
2240   } else if (n == 0) {
2241     if (m) {
2242       PetscCall(VecGetArrayWrite(v, &a));
2243       for (r = 0; r < m; r++) {
2244         a[r] = 0.0;
2245         if (idx) idx[r] = -1;
2246       }
2247       PetscCall(VecRestoreArrayWrite(v, &a));
2248     }
2249     PetscFunctionReturn(PETSC_SUCCESS);
2250   }
2251 
2252   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2253   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2254   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2255   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2256 
2257   /* Get offdiagIdx[] for implicit 0.0 */
2258   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2259   ba = bav;
2260   bi = b->i;
2261   bj = b->j;
2262   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2263   for (r = 0; r < m; r++) {
2264     ncols = bi[r + 1] - bi[r];
2265     if (ncols == A->cmap->N - n) { /* Brow is dense */
2266       offdiagA[r]   = *ba;
2267       offdiagIdx[r] = cmap[0];
2268     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2269       offdiagA[r] = 0.0;
2270 
2271       /* Find first hole in the cmap */
2272       for (j = 0; j < ncols; j++) {
2273         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2274         if (col > j && j < cstart) {
2275           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2276           break;
2277         } else if (col > j + n && j >= cstart) {
2278           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2279           break;
2280         }
2281       }
2282       if (j == ncols && ncols < A->cmap->N - n) {
2283         /* a hole is outside compressed Bcols */
2284         if (ncols == 0) {
2285           if (cstart) {
2286             offdiagIdx[r] = 0;
2287           } else offdiagIdx[r] = cend;
2288         } else { /* ncols > 0 */
2289           offdiagIdx[r] = cmap[ncols - 1] + 1;
2290           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2291         }
2292       }
2293     }
2294 
2295     for (j = 0; j < ncols; j++) {
2296       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2297         offdiagA[r]   = *ba;
2298         offdiagIdx[r] = cmap[*bj];
2299       }
2300       ba++;
2301       bj++;
2302     }
2303   }
2304 
2305   PetscCall(VecGetArrayWrite(v, &a));
2306   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2307   for (r = 0; r < m; ++r) {
2308     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2309       a[r] = diagA[r];
2310       if (idx) idx[r] = cstart + diagIdx[r];
2311     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2312       a[r] = diagA[r];
2313       if (idx) {
2314         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2315           idx[r] = cstart + diagIdx[r];
2316         } else idx[r] = offdiagIdx[r];
2317       }
2318     } else {
2319       a[r] = offdiagA[r];
2320       if (idx) idx[r] = offdiagIdx[r];
2321     }
2322   }
2323   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2324   PetscCall(VecRestoreArrayWrite(v, &a));
2325   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2326   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2327   PetscCall(VecDestroy(&diagV));
2328   PetscCall(VecDestroy(&offdiagV));
2329   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2330   PetscFunctionReturn(PETSC_SUCCESS);
2331 }
2332 
2333 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2334 {
2335   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2336   PetscInt           m = A->rmap->n, n = A->cmap->n;
2337   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2338   PetscInt          *cmap = mat->garray;
2339   PetscInt          *diagIdx, *offdiagIdx;
2340   Vec                diagV, offdiagV;
2341   PetscScalar       *a, *diagA, *offdiagA;
2342   const PetscScalar *ba, *bav;
2343   PetscInt           r, j, col, ncols, *bi, *bj;
2344   Mat                B = mat->B;
2345   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2346 
2347   PetscFunctionBegin;
2348   /* When a process holds entire A and other processes have no entry */
2349   if (A->cmap->N == n) {
2350     PetscCall(VecGetArrayWrite(v, &diagA));
2351     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2352     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2353     PetscCall(VecDestroy(&diagV));
2354     PetscCall(VecRestoreArrayWrite(v, &diagA));
2355     PetscFunctionReturn(PETSC_SUCCESS);
2356   } else if (n == 0) {
2357     if (m) {
2358       PetscCall(VecGetArrayWrite(v, &a));
2359       for (r = 0; r < m; r++) {
2360         a[r] = PETSC_MAX_REAL;
2361         if (idx) idx[r] = -1;
2362       }
2363       PetscCall(VecRestoreArrayWrite(v, &a));
2364     }
2365     PetscFunctionReturn(PETSC_SUCCESS);
2366   }
2367 
2368   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2369   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2370   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2371   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2372 
2373   /* Get offdiagIdx[] for implicit 0.0 */
2374   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2375   ba = bav;
2376   bi = b->i;
2377   bj = b->j;
2378   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2379   for (r = 0; r < m; r++) {
2380     ncols = bi[r + 1] - bi[r];
2381     if (ncols == A->cmap->N - n) { /* Brow is dense */
2382       offdiagA[r]   = *ba;
2383       offdiagIdx[r] = cmap[0];
2384     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2385       offdiagA[r] = 0.0;
2386 
2387       /* Find first hole in the cmap */
2388       for (j = 0; j < ncols; j++) {
2389         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2390         if (col > j && j < cstart) {
2391           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2392           break;
2393         } else if (col > j + n && j >= cstart) {
2394           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2395           break;
2396         }
2397       }
2398       if (j == ncols && ncols < A->cmap->N - n) {
2399         /* a hole is outside compressed Bcols */
2400         if (ncols == 0) {
2401           if (cstart) {
2402             offdiagIdx[r] = 0;
2403           } else offdiagIdx[r] = cend;
2404         } else { /* ncols > 0 */
2405           offdiagIdx[r] = cmap[ncols - 1] + 1;
2406           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2407         }
2408       }
2409     }
2410 
2411     for (j = 0; j < ncols; j++) {
2412       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2413         offdiagA[r]   = *ba;
2414         offdiagIdx[r] = cmap[*bj];
2415       }
2416       ba++;
2417       bj++;
2418     }
2419   }
2420 
2421   PetscCall(VecGetArrayWrite(v, &a));
2422   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2423   for (r = 0; r < m; ++r) {
2424     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2425       a[r] = diagA[r];
2426       if (idx) idx[r] = cstart + diagIdx[r];
2427     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2428       a[r] = diagA[r];
2429       if (idx) {
2430         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2431           idx[r] = cstart + diagIdx[r];
2432         } else idx[r] = offdiagIdx[r];
2433       }
2434     } else {
2435       a[r] = offdiagA[r];
2436       if (idx) idx[r] = offdiagIdx[r];
2437     }
2438   }
2439   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2440   PetscCall(VecRestoreArrayWrite(v, &a));
2441   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2442   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2443   PetscCall(VecDestroy(&diagV));
2444   PetscCall(VecDestroy(&offdiagV));
2445   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2446   PetscFunctionReturn(PETSC_SUCCESS);
2447 }
2448 
2449 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2450 {
2451   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2452   PetscInt           m = A->rmap->n, n = A->cmap->n;
2453   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2454   PetscInt          *cmap = mat->garray;
2455   PetscInt          *diagIdx, *offdiagIdx;
2456   Vec                diagV, offdiagV;
2457   PetscScalar       *a, *diagA, *offdiagA;
2458   const PetscScalar *ba, *bav;
2459   PetscInt           r, j, col, ncols, *bi, *bj;
2460   Mat                B = mat->B;
2461   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2462 
2463   PetscFunctionBegin;
2464   /* When a process holds entire A and other processes have no entry */
2465   if (A->cmap->N == n) {
2466     PetscCall(VecGetArrayWrite(v, &diagA));
2467     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2468     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2469     PetscCall(VecDestroy(&diagV));
2470     PetscCall(VecRestoreArrayWrite(v, &diagA));
2471     PetscFunctionReturn(PETSC_SUCCESS);
2472   } else if (n == 0) {
2473     if (m) {
2474       PetscCall(VecGetArrayWrite(v, &a));
2475       for (r = 0; r < m; r++) {
2476         a[r] = PETSC_MIN_REAL;
2477         if (idx) idx[r] = -1;
2478       }
2479       PetscCall(VecRestoreArrayWrite(v, &a));
2480     }
2481     PetscFunctionReturn(PETSC_SUCCESS);
2482   }
2483 
2484   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2485   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2486   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2487   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2488 
2489   /* Get offdiagIdx[] for implicit 0.0 */
2490   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2491   ba = bav;
2492   bi = b->i;
2493   bj = b->j;
2494   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2495   for (r = 0; r < m; r++) {
2496     ncols = bi[r + 1] - bi[r];
2497     if (ncols == A->cmap->N - n) { /* Brow is dense */
2498       offdiagA[r]   = *ba;
2499       offdiagIdx[r] = cmap[0];
2500     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2501       offdiagA[r] = 0.0;
2502 
2503       /* Find first hole in the cmap */
2504       for (j = 0; j < ncols; j++) {
2505         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2506         if (col > j && j < cstart) {
2507           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2508           break;
2509         } else if (col > j + n && j >= cstart) {
2510           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2511           break;
2512         }
2513       }
2514       if (j == ncols && ncols < A->cmap->N - n) {
2515         /* a hole is outside compressed Bcols */
2516         if (ncols == 0) {
2517           if (cstart) {
2518             offdiagIdx[r] = 0;
2519           } else offdiagIdx[r] = cend;
2520         } else { /* ncols > 0 */
2521           offdiagIdx[r] = cmap[ncols - 1] + 1;
2522           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2523         }
2524       }
2525     }
2526 
2527     for (j = 0; j < ncols; j++) {
2528       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2529         offdiagA[r]   = *ba;
2530         offdiagIdx[r] = cmap[*bj];
2531       }
2532       ba++;
2533       bj++;
2534     }
2535   }
2536 
2537   PetscCall(VecGetArrayWrite(v, &a));
2538   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2539   for (r = 0; r < m; ++r) {
2540     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2541       a[r] = diagA[r];
2542       if (idx) idx[r] = cstart + diagIdx[r];
2543     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2544       a[r] = diagA[r];
2545       if (idx) {
2546         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2547           idx[r] = cstart + diagIdx[r];
2548         } else idx[r] = offdiagIdx[r];
2549       }
2550     } else {
2551       a[r] = offdiagA[r];
2552       if (idx) idx[r] = offdiagIdx[r];
2553     }
2554   }
2555   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2556   PetscCall(VecRestoreArrayWrite(v, &a));
2557   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2558   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2559   PetscCall(VecDestroy(&diagV));
2560   PetscCall(VecDestroy(&offdiagV));
2561   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2562   PetscFunctionReturn(PETSC_SUCCESS);
2563 }
2564 
2565 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2566 {
2567   Mat *dummy;
2568 
2569   PetscFunctionBegin;
2570   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2571   *newmat = *dummy;
2572   PetscCall(PetscFree(dummy));
2573   PetscFunctionReturn(PETSC_SUCCESS);
2574 }
2575 
2576 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2577 {
2578   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2579 
2580   PetscFunctionBegin;
2581   PetscCall(MatInvertBlockDiagonal(a->A, values));
2582   A->factorerrortype = a->A->factorerrortype;
2583   PetscFunctionReturn(PETSC_SUCCESS);
2584 }
2585 
2586 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2587 {
2588   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2589 
2590   PetscFunctionBegin;
2591   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2592   PetscCall(MatSetRandom(aij->A, rctx));
2593   if (x->assembled) {
2594     PetscCall(MatSetRandom(aij->B, rctx));
2595   } else {
2596     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2597   }
2598   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2599   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2600   PetscFunctionReturn(PETSC_SUCCESS);
2601 }
2602 
2603 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2604 {
2605   PetscFunctionBegin;
2606   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2607   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2608   PetscFunctionReturn(PETSC_SUCCESS);
2609 }
2610 
2611 /*@
2612   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2613 
2614   Not Collective
2615 
2616   Input Parameter:
2617 . A - the matrix
2618 
2619   Output Parameter:
2620 . nz - the number of nonzeros
2621 
2622   Level: advanced
2623 
2624 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2625 @*/
2626 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2627 {
2628   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2629   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2630   PetscBool   isaij;
2631 
2632   PetscFunctionBegin;
2633   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2634   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2635   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2636   PetscFunctionReturn(PETSC_SUCCESS);
2637 }
2638 
2639 /*@
2640   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2641 
2642   Collective
2643 
2644   Input Parameters:
2645 + A  - the matrix
2646 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2647 
2648   Level: advanced
2649 
2650 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2651 @*/
2652 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2653 {
2654   PetscFunctionBegin;
2655   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2656   PetscFunctionReturn(PETSC_SUCCESS);
2657 }
2658 
2659 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2660 {
2661   PetscBool sc = PETSC_FALSE, flg;
2662 
2663   PetscFunctionBegin;
2664   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2665   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2666   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2667   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2668   PetscOptionsHeadEnd();
2669   PetscFunctionReturn(PETSC_SUCCESS);
2670 }
2671 
2672 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2673 {
2674   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2675   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2676 
2677   PetscFunctionBegin;
2678   if (!Y->preallocated) {
2679     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2680   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2681     PetscInt nonew = aij->nonew;
2682     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2683     aij->nonew = nonew;
2684   }
2685   PetscCall(MatShift_Basic(Y, a));
2686   PetscFunctionReturn(PETSC_SUCCESS);
2687 }
2688 
2689 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2690 {
2691   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2692 
2693   PetscFunctionBegin;
2694   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2695   PetscCall(MatMissingDiagonal(a->A, missing, d));
2696   if (d) {
2697     PetscInt rstart;
2698     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2699     *d += rstart;
2700   }
2701   PetscFunctionReturn(PETSC_SUCCESS);
2702 }
2703 
2704 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2705 {
2706   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2707 
2708   PetscFunctionBegin;
2709   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2710   PetscFunctionReturn(PETSC_SUCCESS);
2711 }
2712 
2713 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2714 {
2715   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2716 
2717   PetscFunctionBegin;
2718   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2719   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2720   PetscFunctionReturn(PETSC_SUCCESS);
2721 }
2722 
2723 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2724                                        MatGetRow_MPIAIJ,
2725                                        MatRestoreRow_MPIAIJ,
2726                                        MatMult_MPIAIJ,
2727                                        /* 4*/ MatMultAdd_MPIAIJ,
2728                                        MatMultTranspose_MPIAIJ,
2729                                        MatMultTransposeAdd_MPIAIJ,
2730                                        NULL,
2731                                        NULL,
2732                                        NULL,
2733                                        /*10*/ NULL,
2734                                        NULL,
2735                                        NULL,
2736                                        MatSOR_MPIAIJ,
2737                                        MatTranspose_MPIAIJ,
2738                                        /*15*/ MatGetInfo_MPIAIJ,
2739                                        MatEqual_MPIAIJ,
2740                                        MatGetDiagonal_MPIAIJ,
2741                                        MatDiagonalScale_MPIAIJ,
2742                                        MatNorm_MPIAIJ,
2743                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2744                                        MatAssemblyEnd_MPIAIJ,
2745                                        MatSetOption_MPIAIJ,
2746                                        MatZeroEntries_MPIAIJ,
2747                                        /*24*/ MatZeroRows_MPIAIJ,
2748                                        NULL,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        /*29*/ MatSetUp_MPI_Hash,
2753                                        NULL,
2754                                        NULL,
2755                                        MatGetDiagonalBlock_MPIAIJ,
2756                                        NULL,
2757                                        /*34*/ MatDuplicate_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        /*39*/ MatAXPY_MPIAIJ,
2763                                        MatCreateSubMatrices_MPIAIJ,
2764                                        MatIncreaseOverlap_MPIAIJ,
2765                                        MatGetValues_MPIAIJ,
2766                                        MatCopy_MPIAIJ,
2767                                        /*44*/ MatGetRowMax_MPIAIJ,
2768                                        MatScale_MPIAIJ,
2769                                        MatShift_MPIAIJ,
2770                                        MatDiagonalSet_MPIAIJ,
2771                                        MatZeroRowsColumns_MPIAIJ,
2772                                        /*49*/ MatSetRandom_MPIAIJ,
2773                                        MatGetRowIJ_MPIAIJ,
2774                                        MatRestoreRowIJ_MPIAIJ,
2775                                        NULL,
2776                                        NULL,
2777                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2778                                        NULL,
2779                                        MatSetUnfactored_MPIAIJ,
2780                                        MatPermute_MPIAIJ,
2781                                        NULL,
2782                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2783                                        MatDestroy_MPIAIJ,
2784                                        MatView_MPIAIJ,
2785                                        NULL,
2786                                        NULL,
2787                                        /*64*/ NULL,
2788                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2789                                        NULL,
2790                                        NULL,
2791                                        NULL,
2792                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2793                                        MatGetRowMinAbs_MPIAIJ,
2794                                        NULL,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        /*75*/ MatFDColoringApply_AIJ,
2799                                        MatSetFromOptions_MPIAIJ,
2800                                        NULL,
2801                                        NULL,
2802                                        MatFindZeroDiagonals_MPIAIJ,
2803                                        /*80*/ NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        /*83*/ MatLoad_MPIAIJ,
2807                                        MatIsSymmetric_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        NULL,
2811                                        NULL,
2812                                        /*89*/ NULL,
2813                                        NULL,
2814                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2815                                        NULL,
2816                                        NULL,
2817                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2818                                        NULL,
2819                                        NULL,
2820                                        NULL,
2821                                        MatBindToCPU_MPIAIJ,
2822                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2823                                        NULL,
2824                                        NULL,
2825                                        MatConjugate_MPIAIJ,
2826                                        NULL,
2827                                        /*104*/ MatSetValuesRow_MPIAIJ,
2828                                        MatRealPart_MPIAIJ,
2829                                        MatImaginaryPart_MPIAIJ,
2830                                        NULL,
2831                                        NULL,
2832                                        /*109*/ NULL,
2833                                        NULL,
2834                                        MatGetRowMin_MPIAIJ,
2835                                        NULL,
2836                                        MatMissingDiagonal_MPIAIJ,
2837                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2838                                        NULL,
2839                                        MatGetGhosts_MPIAIJ,
2840                                        NULL,
2841                                        NULL,
2842                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2843                                        NULL,
2844                                        NULL,
2845                                        NULL,
2846                                        MatGetMultiProcBlock_MPIAIJ,
2847                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2848                                        MatGetColumnReductions_MPIAIJ,
2849                                        MatInvertBlockDiagonal_MPIAIJ,
2850                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2851                                        MatCreateSubMatricesMPI_MPIAIJ,
2852                                        /*129*/ NULL,
2853                                        NULL,
2854                                        NULL,
2855                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2856                                        NULL,
2857                                        /*134*/ NULL,
2858                                        NULL,
2859                                        NULL,
2860                                        NULL,
2861                                        NULL,
2862                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2863                                        NULL,
2864                                        NULL,
2865                                        MatFDColoringSetUp_MPIXAIJ,
2866                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2867                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2868                                        /*145*/ NULL,
2869                                        NULL,
2870                                        NULL,
2871                                        MatCreateGraph_Simple_AIJ,
2872                                        NULL,
2873                                        /*150*/ NULL,
2874                                        MatEliminateZeros_MPIAIJ,
2875                                        MatGetRowSumAbs_MPIAIJ};
2876 
2877 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2878 {
2879   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2880 
2881   PetscFunctionBegin;
2882   PetscCall(MatStoreValues(aij->A));
2883   PetscCall(MatStoreValues(aij->B));
2884   PetscFunctionReturn(PETSC_SUCCESS);
2885 }
2886 
2887 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2888 {
2889   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2890 
2891   PetscFunctionBegin;
2892   PetscCall(MatRetrieveValues(aij->A));
2893   PetscCall(MatRetrieveValues(aij->B));
2894   PetscFunctionReturn(PETSC_SUCCESS);
2895 }
2896 
2897 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2898 {
2899   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2900   PetscMPIInt size;
2901 
2902   PetscFunctionBegin;
2903   if (B->hash_active) {
2904     B->ops[0]      = b->cops;
2905     B->hash_active = PETSC_FALSE;
2906   }
2907   PetscCall(PetscLayoutSetUp(B->rmap));
2908   PetscCall(PetscLayoutSetUp(B->cmap));
2909 
2910 #if defined(PETSC_USE_CTABLE)
2911   PetscCall(PetscHMapIDestroy(&b->colmap));
2912 #else
2913   PetscCall(PetscFree(b->colmap));
2914 #endif
2915   PetscCall(PetscFree(b->garray));
2916   PetscCall(VecDestroy(&b->lvec));
2917   PetscCall(VecScatterDestroy(&b->Mvctx));
2918 
2919   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2920 
2921   MatSeqXAIJGetOptions_Private(b->B);
2922   PetscCall(MatDestroy(&b->B));
2923   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2924   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2925   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2926   PetscCall(MatSetType(b->B, MATSEQAIJ));
2927   MatSeqXAIJRestoreOptions_Private(b->B);
2928 
2929   MatSeqXAIJGetOptions_Private(b->A);
2930   PetscCall(MatDestroy(&b->A));
2931   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2932   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2933   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2934   PetscCall(MatSetType(b->A, MATSEQAIJ));
2935   MatSeqXAIJRestoreOptions_Private(b->A);
2936 
2937   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2938   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2939   B->preallocated  = PETSC_TRUE;
2940   B->was_assembled = PETSC_FALSE;
2941   B->assembled     = PETSC_FALSE;
2942   PetscFunctionReturn(PETSC_SUCCESS);
2943 }
2944 
2945 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2946 {
2947   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2948 
2949   PetscFunctionBegin;
2950   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2951   PetscCall(PetscLayoutSetUp(B->rmap));
2952   PetscCall(PetscLayoutSetUp(B->cmap));
2953 
2954 #if defined(PETSC_USE_CTABLE)
2955   PetscCall(PetscHMapIDestroy(&b->colmap));
2956 #else
2957   PetscCall(PetscFree(b->colmap));
2958 #endif
2959   PetscCall(PetscFree(b->garray));
2960   PetscCall(VecDestroy(&b->lvec));
2961   PetscCall(VecScatterDestroy(&b->Mvctx));
2962 
2963   PetscCall(MatResetPreallocation(b->A));
2964   PetscCall(MatResetPreallocation(b->B));
2965   B->preallocated  = PETSC_TRUE;
2966   B->was_assembled = PETSC_FALSE;
2967   B->assembled     = PETSC_FALSE;
2968   PetscFunctionReturn(PETSC_SUCCESS);
2969 }
2970 
2971 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2972 {
2973   Mat         mat;
2974   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2975 
2976   PetscFunctionBegin;
2977   *newmat = NULL;
2978   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2979   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2980   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2981   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2982   a = (Mat_MPIAIJ *)mat->data;
2983 
2984   mat->factortype = matin->factortype;
2985   mat->assembled  = matin->assembled;
2986   mat->insertmode = NOT_SET_VALUES;
2987 
2988   a->size         = oldmat->size;
2989   a->rank         = oldmat->rank;
2990   a->donotstash   = oldmat->donotstash;
2991   a->roworiented  = oldmat->roworiented;
2992   a->rowindices   = NULL;
2993   a->rowvalues    = NULL;
2994   a->getrowactive = PETSC_FALSE;
2995 
2996   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2997   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2998   if (matin->hash_active) {
2999     PetscCall(MatSetUp(mat));
3000   } else {
3001     mat->preallocated = matin->preallocated;
3002     if (oldmat->colmap) {
3003 #if defined(PETSC_USE_CTABLE)
3004       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3005 #else
3006       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3007       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3008 #endif
3009     } else a->colmap = NULL;
3010     if (oldmat->garray) {
3011       PetscInt len;
3012       len = oldmat->B->cmap->n;
3013       PetscCall(PetscMalloc1(len + 1, &a->garray));
3014       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3015     } else a->garray = NULL;
3016 
3017     /* It may happen MatDuplicate is called with a non-assembled matrix
3018       In fact, MatDuplicate only requires the matrix to be preallocated
3019       This may happen inside a DMCreateMatrix_Shell */
3020     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3021     if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3022     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3023     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3024   }
3025   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3026   *newmat = mat;
3027   PetscFunctionReturn(PETSC_SUCCESS);
3028 }
3029 
3030 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3031 {
3032   PetscBool isbinary, ishdf5;
3033 
3034   PetscFunctionBegin;
3035   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3036   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3037   /* force binary viewer to load .info file if it has not yet done so */
3038   PetscCall(PetscViewerSetUp(viewer));
3039   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3040   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3041   if (isbinary) {
3042     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3043   } else if (ishdf5) {
3044 #if defined(PETSC_HAVE_HDF5)
3045     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3046 #else
3047     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3048 #endif
3049   } else {
3050     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3051   }
3052   PetscFunctionReturn(PETSC_SUCCESS);
3053 }
3054 
3055 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3056 {
3057   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3058   PetscInt    *rowidxs, *colidxs;
3059   PetscScalar *matvals;
3060 
3061   PetscFunctionBegin;
3062   PetscCall(PetscViewerSetUp(viewer));
3063 
3064   /* read in matrix header */
3065   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3066   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3067   M  = header[1];
3068   N  = header[2];
3069   nz = header[3];
3070   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3071   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3072   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3073 
3074   /* set block sizes from the viewer's .info file */
3075   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3076   /* set global sizes if not set already */
3077   if (mat->rmap->N < 0) mat->rmap->N = M;
3078   if (mat->cmap->N < 0) mat->cmap->N = N;
3079   PetscCall(PetscLayoutSetUp(mat->rmap));
3080   PetscCall(PetscLayoutSetUp(mat->cmap));
3081 
3082   /* check if the matrix sizes are correct */
3083   PetscCall(MatGetSize(mat, &rows, &cols));
3084   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3085 
3086   /* read in row lengths and build row indices */
3087   PetscCall(MatGetLocalSize(mat, &m, NULL));
3088   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3089   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3090   rowidxs[0] = 0;
3091   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3092   if (nz != PETSC_MAX_INT) {
3093     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3094     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3095   }
3096 
3097   /* read in column indices and matrix values */
3098   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3099   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3100   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3101   /* store matrix indices and values */
3102   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3103   PetscCall(PetscFree(rowidxs));
3104   PetscCall(PetscFree2(colidxs, matvals));
3105   PetscFunctionReturn(PETSC_SUCCESS);
3106 }
3107 
3108 /* Not scalable because of ISAllGather() unless getting all columns. */
3109 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3110 {
3111   IS          iscol_local;
3112   PetscBool   isstride;
3113   PetscMPIInt lisstride = 0, gisstride;
3114 
3115   PetscFunctionBegin;
3116   /* check if we are grabbing all columns*/
3117   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3118 
3119   if (isstride) {
3120     PetscInt start, len, mstart, mlen;
3121     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3122     PetscCall(ISGetLocalSize(iscol, &len));
3123     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3124     if (mstart == start && mlen - mstart == len) lisstride = 1;
3125   }
3126 
3127   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3128   if (gisstride) {
3129     PetscInt N;
3130     PetscCall(MatGetSize(mat, NULL, &N));
3131     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3132     PetscCall(ISSetIdentity(iscol_local));
3133     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3134   } else {
3135     PetscInt cbs;
3136     PetscCall(ISGetBlockSize(iscol, &cbs));
3137     PetscCall(ISAllGather(iscol, &iscol_local));
3138     PetscCall(ISSetBlockSize(iscol_local, cbs));
3139   }
3140 
3141   *isseq = iscol_local;
3142   PetscFunctionReturn(PETSC_SUCCESS);
3143 }
3144 
3145 /*
3146  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3147  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3148 
3149  Input Parameters:
3150 +   mat - matrix
3151 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3152            i.e., mat->rstart <= isrow[i] < mat->rend
3153 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3154            i.e., mat->cstart <= iscol[i] < mat->cend
3155 
3156  Output Parameters:
3157 +   isrow_d - sequential row index set for retrieving mat->A
3158 .   iscol_d - sequential  column index set for retrieving mat->A
3159 .   iscol_o - sequential column index set for retrieving mat->B
3160 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3161  */
3162 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3163 {
3164   Vec             x, cmap;
3165   const PetscInt *is_idx;
3166   PetscScalar    *xarray, *cmaparray;
3167   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3168   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3169   Mat             B    = a->B;
3170   Vec             lvec = a->lvec, lcmap;
3171   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3172   MPI_Comm        comm;
3173   VecScatter      Mvctx = a->Mvctx;
3174 
3175   PetscFunctionBegin;
3176   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3177   PetscCall(ISGetLocalSize(iscol, &ncols));
3178 
3179   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3180   PetscCall(MatCreateVecs(mat, &x, NULL));
3181   PetscCall(VecSet(x, -1.0));
3182   PetscCall(VecDuplicate(x, &cmap));
3183   PetscCall(VecSet(cmap, -1.0));
3184 
3185   /* Get start indices */
3186   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3187   isstart -= ncols;
3188   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3189 
3190   PetscCall(ISGetIndices(iscol, &is_idx));
3191   PetscCall(VecGetArray(x, &xarray));
3192   PetscCall(VecGetArray(cmap, &cmaparray));
3193   PetscCall(PetscMalloc1(ncols, &idx));
3194   for (i = 0; i < ncols; i++) {
3195     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3196     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3197     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3198   }
3199   PetscCall(VecRestoreArray(x, &xarray));
3200   PetscCall(VecRestoreArray(cmap, &cmaparray));
3201   PetscCall(ISRestoreIndices(iscol, &is_idx));
3202 
3203   /* Get iscol_d */
3204   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3205   PetscCall(ISGetBlockSize(iscol, &i));
3206   PetscCall(ISSetBlockSize(*iscol_d, i));
3207 
3208   /* Get isrow_d */
3209   PetscCall(ISGetLocalSize(isrow, &m));
3210   rstart = mat->rmap->rstart;
3211   PetscCall(PetscMalloc1(m, &idx));
3212   PetscCall(ISGetIndices(isrow, &is_idx));
3213   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3214   PetscCall(ISRestoreIndices(isrow, &is_idx));
3215 
3216   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3217   PetscCall(ISGetBlockSize(isrow, &i));
3218   PetscCall(ISSetBlockSize(*isrow_d, i));
3219 
3220   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3221   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3222   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3223 
3224   PetscCall(VecDuplicate(lvec, &lcmap));
3225 
3226   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3227   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3228 
3229   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3230   /* off-process column indices */
3231   count = 0;
3232   PetscCall(PetscMalloc1(Bn, &idx));
3233   PetscCall(PetscMalloc1(Bn, &cmap1));
3234 
3235   PetscCall(VecGetArray(lvec, &xarray));
3236   PetscCall(VecGetArray(lcmap, &cmaparray));
3237   for (i = 0; i < Bn; i++) {
3238     if (PetscRealPart(xarray[i]) > -1.0) {
3239       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3240       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3241       count++;
3242     }
3243   }
3244   PetscCall(VecRestoreArray(lvec, &xarray));
3245   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3246 
3247   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3248   /* cannot ensure iscol_o has same blocksize as iscol! */
3249 
3250   PetscCall(PetscFree(idx));
3251   *garray = cmap1;
3252 
3253   PetscCall(VecDestroy(&x));
3254   PetscCall(VecDestroy(&cmap));
3255   PetscCall(VecDestroy(&lcmap));
3256   PetscFunctionReturn(PETSC_SUCCESS);
3257 }
3258 
3259 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3260 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3261 {
3262   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3263   Mat         M = NULL;
3264   MPI_Comm    comm;
3265   IS          iscol_d, isrow_d, iscol_o;
3266   Mat         Asub = NULL, Bsub = NULL;
3267   PetscInt    n;
3268 
3269   PetscFunctionBegin;
3270   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3271 
3272   if (call == MAT_REUSE_MATRIX) {
3273     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3274     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3275     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3276 
3277     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3278     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3279 
3280     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3281     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3282 
3283     /* Update diagonal and off-diagonal portions of submat */
3284     asub = (Mat_MPIAIJ *)(*submat)->data;
3285     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3286     PetscCall(ISGetLocalSize(iscol_o, &n));
3287     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3288     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3289     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3290 
3291   } else { /* call == MAT_INITIAL_MATRIX) */
3292     const PetscInt *garray;
3293     PetscInt        BsubN;
3294 
3295     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3296     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3297 
3298     /* Create local submatrices Asub and Bsub */
3299     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3300     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3301 
3302     /* Create submatrix M */
3303     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3304 
3305     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3306     asub = (Mat_MPIAIJ *)M->data;
3307 
3308     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3309     n = asub->B->cmap->N;
3310     if (BsubN > n) {
3311       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3312       const PetscInt *idx;
3313       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3314       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3315 
3316       PetscCall(PetscMalloc1(n, &idx_new));
3317       j = 0;
3318       PetscCall(ISGetIndices(iscol_o, &idx));
3319       for (i = 0; i < n; i++) {
3320         if (j >= BsubN) break;
3321         while (subgarray[i] > garray[j]) j++;
3322 
3323         if (subgarray[i] == garray[j]) {
3324           idx_new[i] = idx[j++];
3325         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3326       }
3327       PetscCall(ISRestoreIndices(iscol_o, &idx));
3328 
3329       PetscCall(ISDestroy(&iscol_o));
3330       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3331 
3332     } else if (BsubN < n) {
3333       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3334     }
3335 
3336     PetscCall(PetscFree(garray));
3337     *submat = M;
3338 
3339     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3340     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3341     PetscCall(ISDestroy(&isrow_d));
3342 
3343     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3344     PetscCall(ISDestroy(&iscol_d));
3345 
3346     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3347     PetscCall(ISDestroy(&iscol_o));
3348   }
3349   PetscFunctionReturn(PETSC_SUCCESS);
3350 }
3351 
3352 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3353 {
3354   IS        iscol_local = NULL, isrow_d;
3355   PetscInt  csize;
3356   PetscInt  n, i, j, start, end;
3357   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3358   MPI_Comm  comm;
3359 
3360   PetscFunctionBegin;
3361   /* If isrow has same processor distribution as mat,
3362      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3363   if (call == MAT_REUSE_MATRIX) {
3364     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3365     if (isrow_d) {
3366       sameRowDist  = PETSC_TRUE;
3367       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3368     } else {
3369       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3370       if (iscol_local) {
3371         sameRowDist  = PETSC_TRUE;
3372         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3373       }
3374     }
3375   } else {
3376     /* Check if isrow has same processor distribution as mat */
3377     sameDist[0] = PETSC_FALSE;
3378     PetscCall(ISGetLocalSize(isrow, &n));
3379     if (!n) {
3380       sameDist[0] = PETSC_TRUE;
3381     } else {
3382       PetscCall(ISGetMinMax(isrow, &i, &j));
3383       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3384       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3385     }
3386 
3387     /* Check if iscol has same processor distribution as mat */
3388     sameDist[1] = PETSC_FALSE;
3389     PetscCall(ISGetLocalSize(iscol, &n));
3390     if (!n) {
3391       sameDist[1] = PETSC_TRUE;
3392     } else {
3393       PetscCall(ISGetMinMax(iscol, &i, &j));
3394       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3395       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3396     }
3397 
3398     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3399     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3400     sameRowDist = tsameDist[0];
3401   }
3402 
3403   if (sameRowDist) {
3404     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3405       /* isrow and iscol have same processor distribution as mat */
3406       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3407       PetscFunctionReturn(PETSC_SUCCESS);
3408     } else { /* sameRowDist */
3409       /* isrow has same processor distribution as mat */
3410       if (call == MAT_INITIAL_MATRIX) {
3411         PetscBool sorted;
3412         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3413         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3414         PetscCall(ISGetSize(iscol, &i));
3415         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3416 
3417         PetscCall(ISSorted(iscol_local, &sorted));
3418         if (sorted) {
3419           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3420           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3421           PetscFunctionReturn(PETSC_SUCCESS);
3422         }
3423       } else { /* call == MAT_REUSE_MATRIX */
3424         IS iscol_sub;
3425         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3426         if (iscol_sub) {
3427           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3428           PetscFunctionReturn(PETSC_SUCCESS);
3429         }
3430       }
3431     }
3432   }
3433 
3434   /* General case: iscol -> iscol_local which has global size of iscol */
3435   if (call == MAT_REUSE_MATRIX) {
3436     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3437     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3438   } else {
3439     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3440   }
3441 
3442   PetscCall(ISGetLocalSize(iscol, &csize));
3443   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3444 
3445   if (call == MAT_INITIAL_MATRIX) {
3446     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3447     PetscCall(ISDestroy(&iscol_local));
3448   }
3449   PetscFunctionReturn(PETSC_SUCCESS);
3450 }
3451 
3452 /*@C
3453   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3454   and "off-diagonal" part of the matrix in CSR format.
3455 
3456   Collective
3457 
3458   Input Parameters:
3459 + comm   - MPI communicator
3460 . A      - "diagonal" portion of matrix
3461 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3462 - garray - global index of `B` columns
3463 
3464   Output Parameter:
3465 . mat - the matrix, with input `A` as its local diagonal matrix
3466 
3467   Level: advanced
3468 
3469   Notes:
3470   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3471 
3472   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3473 
3474 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3475 @*/
3476 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3477 {
3478   Mat_MPIAIJ        *maij;
3479   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3480   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3481   const PetscScalar *oa;
3482   Mat                Bnew;
3483   PetscInt           m, n, N;
3484   MatType            mpi_mat_type;
3485 
3486   PetscFunctionBegin;
3487   PetscCall(MatCreate(comm, mat));
3488   PetscCall(MatGetSize(A, &m, &n));
3489   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3490   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3491   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3492   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3493 
3494   /* Get global columns of mat */
3495   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3496 
3497   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3498   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3499   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3500   PetscCall(MatSetType(*mat, mpi_mat_type));
3501 
3502   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3503   maij = (Mat_MPIAIJ *)(*mat)->data;
3504 
3505   (*mat)->preallocated = PETSC_TRUE;
3506 
3507   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3508   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3509 
3510   /* Set A as diagonal portion of *mat */
3511   maij->A = A;
3512 
3513   nz = oi[m];
3514   for (i = 0; i < nz; i++) {
3515     col   = oj[i];
3516     oj[i] = garray[col];
3517   }
3518 
3519   /* Set Bnew as off-diagonal portion of *mat */
3520   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3521   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3522   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3523   bnew        = (Mat_SeqAIJ *)Bnew->data;
3524   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3525   maij->B     = Bnew;
3526 
3527   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3528 
3529   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3530   b->free_a       = PETSC_FALSE;
3531   b->free_ij      = PETSC_FALSE;
3532   PetscCall(MatDestroy(&B));
3533 
3534   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3535   bnew->free_a       = PETSC_TRUE;
3536   bnew->free_ij      = PETSC_TRUE;
3537 
3538   /* condense columns of maij->B */
3539   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3540   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3541   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3542   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3543   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3544   PetscFunctionReturn(PETSC_SUCCESS);
3545 }
3546 
3547 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3548 
3549 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3550 {
3551   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3552   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3553   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3554   Mat             M, Msub, B = a->B;
3555   MatScalar      *aa;
3556   Mat_SeqAIJ     *aij;
3557   PetscInt       *garray = a->garray, *colsub, Ncols;
3558   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3559   IS              iscol_sub, iscmap;
3560   const PetscInt *is_idx, *cmap;
3561   PetscBool       allcolumns = PETSC_FALSE;
3562   MPI_Comm        comm;
3563 
3564   PetscFunctionBegin;
3565   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3566   if (call == MAT_REUSE_MATRIX) {
3567     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3568     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3569     PetscCall(ISGetLocalSize(iscol_sub, &count));
3570 
3571     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3572     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3573 
3574     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3575     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3576 
3577     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3578 
3579   } else { /* call == MAT_INITIAL_MATRIX) */
3580     PetscBool flg;
3581 
3582     PetscCall(ISGetLocalSize(iscol, &n));
3583     PetscCall(ISGetSize(iscol, &Ncols));
3584 
3585     /* (1) iscol -> nonscalable iscol_local */
3586     /* Check for special case: each processor gets entire matrix columns */
3587     PetscCall(ISIdentity(iscol_local, &flg));
3588     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3589     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3590     if (allcolumns) {
3591       iscol_sub = iscol_local;
3592       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3593       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3594 
3595     } else {
3596       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3597       PetscInt *idx, *cmap1, k;
3598       PetscCall(PetscMalloc1(Ncols, &idx));
3599       PetscCall(PetscMalloc1(Ncols, &cmap1));
3600       PetscCall(ISGetIndices(iscol_local, &is_idx));
3601       count = 0;
3602       k     = 0;
3603       for (i = 0; i < Ncols; i++) {
3604         j = is_idx[i];
3605         if (j >= cstart && j < cend) {
3606           /* diagonal part of mat */
3607           idx[count]     = j;
3608           cmap1[count++] = i; /* column index in submat */
3609         } else if (Bn) {
3610           /* off-diagonal part of mat */
3611           if (j == garray[k]) {
3612             idx[count]     = j;
3613             cmap1[count++] = i; /* column index in submat */
3614           } else if (j > garray[k]) {
3615             while (j > garray[k] && k < Bn - 1) k++;
3616             if (j == garray[k]) {
3617               idx[count]     = j;
3618               cmap1[count++] = i; /* column index in submat */
3619             }
3620           }
3621         }
3622       }
3623       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3624 
3625       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3626       PetscCall(ISGetBlockSize(iscol, &cbs));
3627       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3628 
3629       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3630     }
3631 
3632     /* (3) Create sequential Msub */
3633     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3634   }
3635 
3636   PetscCall(ISGetLocalSize(iscol_sub, &count));
3637   aij = (Mat_SeqAIJ *)(Msub)->data;
3638   ii  = aij->i;
3639   PetscCall(ISGetIndices(iscmap, &cmap));
3640 
3641   /*
3642       m - number of local rows
3643       Ncols - number of columns (same on all processors)
3644       rstart - first row in new global matrix generated
3645   */
3646   PetscCall(MatGetSize(Msub, &m, NULL));
3647 
3648   if (call == MAT_INITIAL_MATRIX) {
3649     /* (4) Create parallel newmat */
3650     PetscMPIInt rank, size;
3651     PetscInt    csize;
3652 
3653     PetscCallMPI(MPI_Comm_size(comm, &size));
3654     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3655 
3656     /*
3657         Determine the number of non-zeros in the diagonal and off-diagonal
3658         portions of the matrix in order to do correct preallocation
3659     */
3660 
3661     /* first get start and end of "diagonal" columns */
3662     PetscCall(ISGetLocalSize(iscol, &csize));
3663     if (csize == PETSC_DECIDE) {
3664       PetscCall(ISGetSize(isrow, &mglobal));
3665       if (mglobal == Ncols) { /* square matrix */
3666         nlocal = m;
3667       } else {
3668         nlocal = Ncols / size + ((Ncols % size) > rank);
3669       }
3670     } else {
3671       nlocal = csize;
3672     }
3673     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3674     rstart = rend - nlocal;
3675     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3676 
3677     /* next, compute all the lengths */
3678     jj = aij->j;
3679     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3680     olens = dlens + m;
3681     for (i = 0; i < m; i++) {
3682       jend = ii[i + 1] - ii[i];
3683       olen = 0;
3684       dlen = 0;
3685       for (j = 0; j < jend; j++) {
3686         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3687         else dlen++;
3688         jj++;
3689       }
3690       olens[i] = olen;
3691       dlens[i] = dlen;
3692     }
3693 
3694     PetscCall(ISGetBlockSize(isrow, &bs));
3695     PetscCall(ISGetBlockSize(iscol, &cbs));
3696 
3697     PetscCall(MatCreate(comm, &M));
3698     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3699     PetscCall(MatSetBlockSizes(M, bs, cbs));
3700     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3701     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3702     PetscCall(PetscFree(dlens));
3703 
3704   } else { /* call == MAT_REUSE_MATRIX */
3705     M = *newmat;
3706     PetscCall(MatGetLocalSize(M, &i, NULL));
3707     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3708     PetscCall(MatZeroEntries(M));
3709     /*
3710          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3711        rather than the slower MatSetValues().
3712     */
3713     M->was_assembled = PETSC_TRUE;
3714     M->assembled     = PETSC_FALSE;
3715   }
3716 
3717   /* (5) Set values of Msub to *newmat */
3718   PetscCall(PetscMalloc1(count, &colsub));
3719   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3720 
3721   jj = aij->j;
3722   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3723   for (i = 0; i < m; i++) {
3724     row = rstart + i;
3725     nz  = ii[i + 1] - ii[i];
3726     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3727     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3728     jj += nz;
3729     aa += nz;
3730   }
3731   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3732   PetscCall(ISRestoreIndices(iscmap, &cmap));
3733 
3734   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3735   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3736 
3737   PetscCall(PetscFree(colsub));
3738 
3739   /* save Msub, iscol_sub and iscmap used in processor for next request */
3740   if (call == MAT_INITIAL_MATRIX) {
3741     *newmat = M;
3742     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3743     PetscCall(MatDestroy(&Msub));
3744 
3745     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3746     PetscCall(ISDestroy(&iscol_sub));
3747 
3748     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3749     PetscCall(ISDestroy(&iscmap));
3750 
3751     if (iscol_local) {
3752       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3753       PetscCall(ISDestroy(&iscol_local));
3754     }
3755   }
3756   PetscFunctionReturn(PETSC_SUCCESS);
3757 }
3758 
3759 /*
3760     Not great since it makes two copies of the submatrix, first an SeqAIJ
3761   in local and then by concatenating the local matrices the end result.
3762   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3763 
3764   This requires a sequential iscol with all indices.
3765 */
3766 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3767 {
3768   PetscMPIInt rank, size;
3769   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3770   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3771   Mat         M, Mreuse;
3772   MatScalar  *aa, *vwork;
3773   MPI_Comm    comm;
3774   Mat_SeqAIJ *aij;
3775   PetscBool   colflag, allcolumns = PETSC_FALSE;
3776 
3777   PetscFunctionBegin;
3778   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3779   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3780   PetscCallMPI(MPI_Comm_size(comm, &size));
3781 
3782   /* Check for special case: each processor gets entire matrix columns */
3783   PetscCall(ISIdentity(iscol, &colflag));
3784   PetscCall(ISGetLocalSize(iscol, &n));
3785   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3786   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3787 
3788   if (call == MAT_REUSE_MATRIX) {
3789     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3790     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3791     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3792   } else {
3793     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3794   }
3795 
3796   /*
3797       m - number of local rows
3798       n - number of columns (same on all processors)
3799       rstart - first row in new global matrix generated
3800   */
3801   PetscCall(MatGetSize(Mreuse, &m, &n));
3802   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3803   if (call == MAT_INITIAL_MATRIX) {
3804     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3805     ii  = aij->i;
3806     jj  = aij->j;
3807 
3808     /*
3809         Determine the number of non-zeros in the diagonal and off-diagonal
3810         portions of the matrix in order to do correct preallocation
3811     */
3812 
3813     /* first get start and end of "diagonal" columns */
3814     if (csize == PETSC_DECIDE) {
3815       PetscCall(ISGetSize(isrow, &mglobal));
3816       if (mglobal == n) { /* square matrix */
3817         nlocal = m;
3818       } else {
3819         nlocal = n / size + ((n % size) > rank);
3820       }
3821     } else {
3822       nlocal = csize;
3823     }
3824     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3825     rstart = rend - nlocal;
3826     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3827 
3828     /* next, compute all the lengths */
3829     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3830     olens = dlens + m;
3831     for (i = 0; i < m; i++) {
3832       jend = ii[i + 1] - ii[i];
3833       olen = 0;
3834       dlen = 0;
3835       for (j = 0; j < jend; j++) {
3836         if (*jj < rstart || *jj >= rend) olen++;
3837         else dlen++;
3838         jj++;
3839       }
3840       olens[i] = olen;
3841       dlens[i] = dlen;
3842     }
3843     PetscCall(MatCreate(comm, &M));
3844     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3845     PetscCall(MatSetBlockSizes(M, bs, cbs));
3846     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3847     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3848     PetscCall(PetscFree(dlens));
3849   } else {
3850     PetscInt ml, nl;
3851 
3852     M = *newmat;
3853     PetscCall(MatGetLocalSize(M, &ml, &nl));
3854     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3855     PetscCall(MatZeroEntries(M));
3856     /*
3857          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3858        rather than the slower MatSetValues().
3859     */
3860     M->was_assembled = PETSC_TRUE;
3861     M->assembled     = PETSC_FALSE;
3862   }
3863   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3864   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3865   ii  = aij->i;
3866   jj  = aij->j;
3867 
3868   /* trigger copy to CPU if needed */
3869   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3870   for (i = 0; i < m; i++) {
3871     row   = rstart + i;
3872     nz    = ii[i + 1] - ii[i];
3873     cwork = jj;
3874     jj    = PetscSafePointerPlusOffset(jj, nz);
3875     vwork = aa;
3876     aa    = PetscSafePointerPlusOffset(aa, nz);
3877     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3878   }
3879   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3880 
3881   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3882   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3883   *newmat = M;
3884 
3885   /* save submatrix used in processor for next request */
3886   if (call == MAT_INITIAL_MATRIX) {
3887     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3888     PetscCall(MatDestroy(&Mreuse));
3889   }
3890   PetscFunctionReturn(PETSC_SUCCESS);
3891 }
3892 
3893 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3894 {
3895   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3896   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3897   const PetscInt *JJ;
3898   PetscBool       nooffprocentries;
3899   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3900 
3901   PetscFunctionBegin;
3902   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3903 
3904   PetscCall(PetscLayoutSetUp(B->rmap));
3905   PetscCall(PetscLayoutSetUp(B->cmap));
3906   m      = B->rmap->n;
3907   cstart = B->cmap->rstart;
3908   cend   = B->cmap->rend;
3909   rstart = B->rmap->rstart;
3910 
3911   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3912 
3913   if (PetscDefined(USE_DEBUG)) {
3914     for (i = 0; i < m; i++) {
3915       nnz = Ii[i + 1] - Ii[i];
3916       JJ  = PetscSafePointerPlusOffset(J, Ii[i]);
3917       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3918       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3919       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3920     }
3921   }
3922 
3923   for (i = 0; i < m; i++) {
3924     nnz     = Ii[i + 1] - Ii[i];
3925     JJ      = PetscSafePointerPlusOffset(J, Ii[i]);
3926     nnz_max = PetscMax(nnz_max, nnz);
3927     d       = 0;
3928     for (j = 0; j < nnz; j++) {
3929       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3930     }
3931     d_nnz[i] = d;
3932     o_nnz[i] = nnz - d;
3933   }
3934   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3935   PetscCall(PetscFree2(d_nnz, o_nnz));
3936 
3937   for (i = 0; i < m; i++) {
3938     ii = i + rstart;
3939     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES));
3940   }
3941   nooffprocentries    = B->nooffprocentries;
3942   B->nooffprocentries = PETSC_TRUE;
3943   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3944   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3945   B->nooffprocentries = nooffprocentries;
3946 
3947   /* count number of entries below block diagonal */
3948   PetscCall(PetscFree(Aij->ld));
3949   PetscCall(PetscCalloc1(m, &ld));
3950   Aij->ld = ld;
3951   for (i = 0; i < m; i++) {
3952     nnz = Ii[i + 1] - Ii[i];
3953     j   = 0;
3954     while (j < nnz && J[j] < cstart) j++;
3955     ld[i] = j;
3956     if (J) J += nnz;
3957   }
3958 
3959   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3960   PetscFunctionReturn(PETSC_SUCCESS);
3961 }
3962 
3963 /*@
3964   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3965   (the default parallel PETSc format).
3966 
3967   Collective
3968 
3969   Input Parameters:
3970 + B - the matrix
3971 . i - the indices into `j` for the start of each local row (indices start with zero)
3972 . j - the column indices for each local row (indices start with zero)
3973 - v - optional values in the matrix
3974 
3975   Level: developer
3976 
3977   Notes:
3978   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3979   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3980   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3981 
3982   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3983 
3984   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3985 
3986   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3987 
3988   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3989   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3990 
3991   The format which is used for the sparse matrix input, is equivalent to a
3992   row-major ordering.. i.e for the following matrix, the input data expected is
3993   as shown
3994 .vb
3995         1 0 0
3996         2 0 3     P0
3997        -------
3998         4 5 6     P1
3999 
4000      Process0 [P0] rows_owned=[0,1]
4001         i =  {0,1,3}  [size = nrow+1  = 2+1]
4002         j =  {0,0,2}  [size = 3]
4003         v =  {1,2,3}  [size = 3]
4004 
4005      Process1 [P1] rows_owned=[2]
4006         i =  {0,3}    [size = nrow+1  = 1+1]
4007         j =  {0,1,2}  [size = 3]
4008         v =  {4,5,6}  [size = 3]
4009 .ve
4010 
4011 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4012           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4013 @*/
4014 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4015 {
4016   PetscFunctionBegin;
4017   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4018   PetscFunctionReturn(PETSC_SUCCESS);
4019 }
4020 
4021 /*@C
4022   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4023   (the default parallel PETSc format).  For good matrix assembly performance
4024   the user should preallocate the matrix storage by setting the parameters
4025   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4026 
4027   Collective
4028 
4029   Input Parameters:
4030 + B     - the matrix
4031 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4032            (same value is used for all local rows)
4033 . d_nnz - array containing the number of nonzeros in the various rows of the
4034            DIAGONAL portion of the local submatrix (possibly different for each row)
4035            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4036            The size of this array is equal to the number of local rows, i.e 'm'.
4037            For matrices that will be factored, you must leave room for (and set)
4038            the diagonal entry even if it is zero.
4039 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4040            submatrix (same value is used for all local rows).
4041 - o_nnz - array containing the number of nonzeros in the various rows of the
4042            OFF-DIAGONAL portion of the local submatrix (possibly different for
4043            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4044            structure. The size of this array is equal to the number
4045            of local rows, i.e 'm'.
4046 
4047   Example Usage:
4048   Consider the following 8x8 matrix with 34 non-zero values, that is
4049   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4050   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4051   as follows
4052 
4053 .vb
4054             1  2  0  |  0  3  0  |  0  4
4055     Proc0   0  5  6  |  7  0  0  |  8  0
4056             9  0 10  | 11  0  0  | 12  0
4057     -------------------------------------
4058            13  0 14  | 15 16 17  |  0  0
4059     Proc1   0 18  0  | 19 20 21  |  0  0
4060             0  0  0  | 22 23  0  | 24  0
4061     -------------------------------------
4062     Proc2  25 26 27  |  0  0 28  | 29  0
4063            30  0  0  | 31 32 33  |  0 34
4064 .ve
4065 
4066   This can be represented as a collection of submatrices as
4067 .vb
4068       A B C
4069       D E F
4070       G H I
4071 .ve
4072 
4073   Where the submatrices A,B,C are owned by proc0, D,E,F are
4074   owned by proc1, G,H,I are owned by proc2.
4075 
4076   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4077   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4078   The 'M','N' parameters are 8,8, and have the same values on all procs.
4079 
4080   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4081   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4082   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4083   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4084   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4085   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4086 
4087   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4088   allocated for every row of the local diagonal submatrix, and `o_nz`
4089   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4090   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4091   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4092   In this case, the values of `d_nz`, `o_nz` are
4093 .vb
4094      proc0  dnz = 2, o_nz = 2
4095      proc1  dnz = 3, o_nz = 2
4096      proc2  dnz = 1, o_nz = 4
4097 .ve
4098   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4099   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4100   for proc3. i.e we are using 12+15+10=37 storage locations to store
4101   34 values.
4102 
4103   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4104   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4105   In the above case the values for `d_nnz`, `o_nnz` are
4106 .vb
4107      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4108      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4109      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4110 .ve
4111   Here the space allocated is sum of all the above values i.e 34, and
4112   hence pre-allocation is perfect.
4113 
4114   Level: intermediate
4115 
4116   Notes:
4117   If the *_nnz parameter is given then the *_nz parameter is ignored
4118 
4119   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4120   storage.  The stored row and column indices begin with zero.
4121   See [Sparse Matrices](sec_matsparse) for details.
4122 
4123   The parallel matrix is partitioned such that the first m0 rows belong to
4124   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4125   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4126 
4127   The DIAGONAL portion of the local submatrix of a processor can be defined
4128   as the submatrix which is obtained by extraction the part corresponding to
4129   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4130   first row that belongs to the processor, r2 is the last row belonging to
4131   the this processor, and c1-c2 is range of indices of the local part of a
4132   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4133   common case of a square matrix, the row and column ranges are the same and
4134   the DIAGONAL part is also square. The remaining portion of the local
4135   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4136 
4137   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4138 
4139   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4140   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4141   You can also run with the option `-info` and look for messages with the string
4142   malloc in them to see if additional memory allocation was needed.
4143 
4144 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4145           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4146 @*/
4147 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4148 {
4149   PetscFunctionBegin;
4150   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4151   PetscValidType(B, 1);
4152   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4153   PetscFunctionReturn(PETSC_SUCCESS);
4154 }
4155 
4156 /*@
4157   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4158   CSR format for the local rows.
4159 
4160   Collective
4161 
4162   Input Parameters:
4163 + comm - MPI communicator
4164 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4165 . n    - This value should be the same as the local size used in creating the
4166          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4167          calculated if `N` is given) For square matrices n is almost always `m`.
4168 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4169 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4170 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4171 . j    - global column indices
4172 - a    - optional matrix values
4173 
4174   Output Parameter:
4175 . mat - the matrix
4176 
4177   Level: intermediate
4178 
4179   Notes:
4180   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4181   thus you CANNOT change the matrix entries by changing the values of a[] after you have
4182   called this routine. Use `MatCreateMPIAIJWithSplitArray()` to avoid needing to copy the arrays.
4183 
4184   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4185 
4186   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4187 
4188   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4189   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4190 
4191   The format which is used for the sparse matrix input, is equivalent to a
4192   row-major ordering.. i.e for the following matrix, the input data expected is
4193   as shown
4194 .vb
4195         1 0 0
4196         2 0 3     P0
4197        -------
4198         4 5 6     P1
4199 
4200      Process0 [P0] rows_owned=[0,1]
4201         i =  {0,1,3}  [size = nrow+1  = 2+1]
4202         j =  {0,0,2}  [size = 3]
4203         v =  {1,2,3}  [size = 3]
4204 
4205      Process1 [P1] rows_owned=[2]
4206         i =  {0,3}    [size = nrow+1  = 1+1]
4207         j =  {0,1,2}  [size = 3]
4208         v =  {4,5,6}  [size = 3]
4209 .ve
4210 
4211 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4212           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4213 @*/
4214 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4215 {
4216   PetscFunctionBegin;
4217   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4218   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4219   PetscCall(MatCreate(comm, mat));
4220   PetscCall(MatSetSizes(*mat, m, n, M, N));
4221   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4222   PetscCall(MatSetType(*mat, MATMPIAIJ));
4223   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4224   PetscFunctionReturn(PETSC_SUCCESS);
4225 }
4226 
4227 /*@
4228   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4229   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4230   from `MatCreateMPIAIJWithArrays()`
4231 
4232   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4233 
4234   Collective
4235 
4236   Input Parameters:
4237 + mat - the matrix
4238 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4239 . n   - This value should be the same as the local size used in creating the
4240        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4241        calculated if N is given) For square matrices n is almost always m.
4242 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4243 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4244 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4245 . J   - column indices
4246 - v   - matrix values
4247 
4248   Level: deprecated
4249 
4250 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4251           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4252 @*/
4253 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4254 {
4255   PetscInt        nnz, i;
4256   PetscBool       nooffprocentries;
4257   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4258   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4259   PetscScalar    *ad, *ao;
4260   PetscInt        ldi, Iii, md;
4261   const PetscInt *Adi = Ad->i;
4262   PetscInt       *ld  = Aij->ld;
4263 
4264   PetscFunctionBegin;
4265   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4266   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4267   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4268   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4269 
4270   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4271   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4272 
4273   for (i = 0; i < m; i++) {
4274     if (PetscDefined(USE_DEBUG)) {
4275       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4276         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4277         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4278       }
4279     }
4280     nnz = Ii[i + 1] - Ii[i];
4281     Iii = Ii[i];
4282     ldi = ld[i];
4283     md  = Adi[i + 1] - Adi[i];
4284     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4285     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4286     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4287     ad += md;
4288     ao += nnz - md;
4289   }
4290   nooffprocentries      = mat->nooffprocentries;
4291   mat->nooffprocentries = PETSC_TRUE;
4292   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4293   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4294   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4295   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4296   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4297   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4298   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4299   mat->nooffprocentries = nooffprocentries;
4300   PetscFunctionReturn(PETSC_SUCCESS);
4301 }
4302 
4303 /*@
4304   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4305 
4306   Collective
4307 
4308   Input Parameters:
4309 + mat - the matrix
4310 - v   - matrix values, stored by row
4311 
4312   Level: intermediate
4313 
4314   Notes:
4315   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4316 
4317   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4318 
4319 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4320           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4321 @*/
4322 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4323 {
4324   PetscInt        nnz, i, m;
4325   PetscBool       nooffprocentries;
4326   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4327   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4328   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4329   PetscScalar    *ad, *ao;
4330   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4331   PetscInt        ldi, Iii, md;
4332   PetscInt       *ld = Aij->ld;
4333 
4334   PetscFunctionBegin;
4335   m = mat->rmap->n;
4336 
4337   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4338   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4339   Iii = 0;
4340   for (i = 0; i < m; i++) {
4341     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4342     ldi = ld[i];
4343     md  = Adi[i + 1] - Adi[i];
4344     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4345     ad += md;
4346     if (ao) {
4347       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4348       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4349       ao += nnz - md;
4350     }
4351     Iii += nnz;
4352   }
4353   nooffprocentries      = mat->nooffprocentries;
4354   mat->nooffprocentries = PETSC_TRUE;
4355   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4356   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4357   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4358   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4359   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4360   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4361   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4362   mat->nooffprocentries = nooffprocentries;
4363   PetscFunctionReturn(PETSC_SUCCESS);
4364 }
4365 
4366 /*@C
4367   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4368   (the default parallel PETSc format).  For good matrix assembly performance
4369   the user should preallocate the matrix storage by setting the parameters
4370   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4371 
4372   Collective
4373 
4374   Input Parameters:
4375 + comm  - MPI communicator
4376 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4377            This value should be the same as the local size used in creating the
4378            y vector for the matrix-vector product y = Ax.
4379 . n     - This value should be the same as the local size used in creating the
4380        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4381        calculated if N is given) For square matrices n is almost always m.
4382 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4383 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4384 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4385            (same value is used for all local rows)
4386 . d_nnz - array containing the number of nonzeros in the various rows of the
4387            DIAGONAL portion of the local submatrix (possibly different for each row)
4388            or `NULL`, if `d_nz` is used to specify the nonzero structure.
4389            The size of this array is equal to the number of local rows, i.e 'm'.
4390 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4391            submatrix (same value is used for all local rows).
4392 - o_nnz - array containing the number of nonzeros in the various rows of the
4393            OFF-DIAGONAL portion of the local submatrix (possibly different for
4394            each row) or `NULL`, if `o_nz` is used to specify the nonzero
4395            structure. The size of this array is equal to the number
4396            of local rows, i.e 'm'.
4397 
4398   Output Parameter:
4399 . A - the matrix
4400 
4401   Options Database Keys:
4402 + -mat_no_inode                     - Do not use inodes
4403 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4404 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4405         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4406         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4407 
4408   Level: intermediate
4409 
4410   Notes:
4411   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4412   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4413   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4414 
4415   If the *_nnz parameter is given then the *_nz parameter is ignored
4416 
4417   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4418   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4419   storage requirements for this matrix.
4420 
4421   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4422   processor than it must be used on all processors that share the object for
4423   that argument.
4424 
4425   The user MUST specify either the local or global matrix dimensions
4426   (possibly both).
4427 
4428   The parallel matrix is partitioned across processors such that the
4429   first m0 rows belong to process 0, the next m1 rows belong to
4430   process 1, the next m2 rows belong to process 2 etc.. where
4431   m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4432   values corresponding to [m x N] submatrix.
4433 
4434   The columns are logically partitioned with the n0 columns belonging
4435   to 0th partition, the next n1 columns belonging to the next
4436   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4437 
4438   The DIAGONAL portion of the local submatrix on any given processor
4439   is the submatrix corresponding to the rows and columns m,n
4440   corresponding to the given processor. i.e diagonal matrix on
4441   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4442   etc. The remaining portion of the local submatrix [m x (N-n)]
4443   constitute the OFF-DIAGONAL portion. The example below better
4444   illustrates this concept.
4445 
4446   For a square global matrix we define each processor's diagonal portion
4447   to be its local rows and the corresponding columns (a square submatrix);
4448   each processor's off-diagonal portion encompasses the remainder of the
4449   local matrix (a rectangular submatrix).
4450 
4451   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4452 
4453   When calling this routine with a single process communicator, a matrix of
4454   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4455   type of communicator, use the construction mechanism
4456 .vb
4457   MatCreate(..., &A);
4458   MatSetType(A, MATMPIAIJ);
4459   MatSetSizes(A, m, n, M, N);
4460   MatMPIAIJSetPreallocation(A, ...);
4461 .ve
4462 
4463   By default, this format uses inodes (identical nodes) when possible.
4464   We search for consecutive rows with the same nonzero structure, thereby
4465   reusing matrix information to achieve increased efficiency.
4466 
4467   Example Usage:
4468   Consider the following 8x8 matrix with 34 non-zero values, that is
4469   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4470   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4471   as follows
4472 
4473 .vb
4474             1  2  0  |  0  3  0  |  0  4
4475     Proc0   0  5  6  |  7  0  0  |  8  0
4476             9  0 10  | 11  0  0  | 12  0
4477     -------------------------------------
4478            13  0 14  | 15 16 17  |  0  0
4479     Proc1   0 18  0  | 19 20 21  |  0  0
4480             0  0  0  | 22 23  0  | 24  0
4481     -------------------------------------
4482     Proc2  25 26 27  |  0  0 28  | 29  0
4483            30  0  0  | 31 32 33  |  0 34
4484 .ve
4485 
4486   This can be represented as a collection of submatrices as
4487 
4488 .vb
4489       A B C
4490       D E F
4491       G H I
4492 .ve
4493 
4494   Where the submatrices A,B,C are owned by proc0, D,E,F are
4495   owned by proc1, G,H,I are owned by proc2.
4496 
4497   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4498   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4499   The 'M','N' parameters are 8,8, and have the same values on all procs.
4500 
4501   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4502   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4503   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4504   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4505   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4506   matrix, ans [DF] as another SeqAIJ matrix.
4507 
4508   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4509   allocated for every row of the local diagonal submatrix, and `o_nz`
4510   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4511   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4512   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4513   In this case, the values of `d_nz`,`o_nz` are
4514 .vb
4515      proc0  dnz = 2, o_nz = 2
4516      proc1  dnz = 3, o_nz = 2
4517      proc2  dnz = 1, o_nz = 4
4518 .ve
4519   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4520   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4521   for proc3. i.e we are using 12+15+10=37 storage locations to store
4522   34 values.
4523 
4524   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4525   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4526   In the above case the values for d_nnz,o_nnz are
4527 .vb
4528      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4529      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4530      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4531 .ve
4532   Here the space allocated is sum of all the above values i.e 34, and
4533   hence pre-allocation is perfect.
4534 
4535 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4536           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4537 @*/
4538 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4539 {
4540   PetscMPIInt size;
4541 
4542   PetscFunctionBegin;
4543   PetscCall(MatCreate(comm, A));
4544   PetscCall(MatSetSizes(*A, m, n, M, N));
4545   PetscCallMPI(MPI_Comm_size(comm, &size));
4546   if (size > 1) {
4547     PetscCall(MatSetType(*A, MATMPIAIJ));
4548     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4549   } else {
4550     PetscCall(MatSetType(*A, MATSEQAIJ));
4551     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4552   }
4553   PetscFunctionReturn(PETSC_SUCCESS);
4554 }
4555 
4556 /*MC
4557     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4558 
4559     Synopsis:
4560     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4561 
4562     Not Collective
4563 
4564     Input Parameter:
4565 .   A - the `MATMPIAIJ` matrix
4566 
4567     Output Parameters:
4568 +   Ad - the diagonal portion of the matrix
4569 .   Ao - the off-diagonal portion of the matrix
4570 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4571 -   ierr - error code
4572 
4573      Level: advanced
4574 
4575     Note:
4576     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4577 
4578 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4579 M*/
4580 
4581 /*MC
4582     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4583 
4584     Synopsis:
4585     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4586 
4587     Not Collective
4588 
4589     Input Parameters:
4590 +   A - the `MATMPIAIJ` matrix
4591 .   Ad - the diagonal portion of the matrix
4592 .   Ao - the off-diagonal portion of the matrix
4593 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4594 -   ierr - error code
4595 
4596      Level: advanced
4597 
4598 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4599 M*/
4600 
4601 /*@C
4602   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4603 
4604   Not Collective
4605 
4606   Input Parameter:
4607 . A - The `MATMPIAIJ` matrix
4608 
4609   Output Parameters:
4610 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4611 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4612 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4613 
4614   Level: intermediate
4615 
4616   Note:
4617   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4618   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4619   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4620   local column numbers to global column numbers in the original matrix.
4621 
4622   Fortran Notes:
4623   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4624 
4625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4626 @*/
4627 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4628 {
4629   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4630   PetscBool   flg;
4631 
4632   PetscFunctionBegin;
4633   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4634   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4635   if (Ad) *Ad = a->A;
4636   if (Ao) *Ao = a->B;
4637   if (colmap) *colmap = a->garray;
4638   PetscFunctionReturn(PETSC_SUCCESS);
4639 }
4640 
4641 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4642 {
4643   PetscInt     m, N, i, rstart, nnz, Ii;
4644   PetscInt    *indx;
4645   PetscScalar *values;
4646   MatType      rootType;
4647 
4648   PetscFunctionBegin;
4649   PetscCall(MatGetSize(inmat, &m, &N));
4650   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4651     PetscInt *dnz, *onz, sum, bs, cbs;
4652 
4653     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4654     /* Check sum(n) = N */
4655     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4656     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4657 
4658     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4659     rstart -= m;
4660 
4661     MatPreallocateBegin(comm, m, n, dnz, onz);
4662     for (i = 0; i < m; i++) {
4663       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4664       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4665       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4666     }
4667 
4668     PetscCall(MatCreate(comm, outmat));
4669     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4670     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4671     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4672     PetscCall(MatGetRootType_Private(inmat, &rootType));
4673     PetscCall(MatSetType(*outmat, rootType));
4674     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4675     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4676     MatPreallocateEnd(dnz, onz);
4677     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4678   }
4679 
4680   /* numeric phase */
4681   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4682   for (i = 0; i < m; i++) {
4683     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4684     Ii = i + rstart;
4685     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4686     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4687   }
4688   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4689   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4690   PetscFunctionReturn(PETSC_SUCCESS);
4691 }
4692 
4693 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4694 {
4695   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4696 
4697   PetscFunctionBegin;
4698   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4699   PetscCall(PetscFree(merge->id_r));
4700   PetscCall(PetscFree(merge->len_s));
4701   PetscCall(PetscFree(merge->len_r));
4702   PetscCall(PetscFree(merge->bi));
4703   PetscCall(PetscFree(merge->bj));
4704   PetscCall(PetscFree(merge->buf_ri[0]));
4705   PetscCall(PetscFree(merge->buf_ri));
4706   PetscCall(PetscFree(merge->buf_rj[0]));
4707   PetscCall(PetscFree(merge->buf_rj));
4708   PetscCall(PetscFree(merge->coi));
4709   PetscCall(PetscFree(merge->coj));
4710   PetscCall(PetscFree(merge->owners_co));
4711   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4712   PetscCall(PetscFree(merge));
4713   PetscFunctionReturn(PETSC_SUCCESS);
4714 }
4715 
4716 #include <../src/mat/utils/freespace.h>
4717 #include <petscbt.h>
4718 
4719 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4720 {
4721   MPI_Comm             comm;
4722   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4723   PetscMPIInt          size, rank, taga, *len_s;
4724   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4725   PetscInt             proc, m;
4726   PetscInt           **buf_ri, **buf_rj;
4727   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4728   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4729   MPI_Request         *s_waits, *r_waits;
4730   MPI_Status          *status;
4731   const MatScalar     *aa, *a_a;
4732   MatScalar          **abuf_r, *ba_i;
4733   Mat_Merge_SeqsToMPI *merge;
4734   PetscContainer       container;
4735 
4736   PetscFunctionBegin;
4737   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4738   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4739 
4740   PetscCallMPI(MPI_Comm_size(comm, &size));
4741   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4742 
4743   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4744   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4745   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4746   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4747   aa = a_a;
4748 
4749   bi     = merge->bi;
4750   bj     = merge->bj;
4751   buf_ri = merge->buf_ri;
4752   buf_rj = merge->buf_rj;
4753 
4754   PetscCall(PetscMalloc1(size, &status));
4755   owners = merge->rowmap->range;
4756   len_s  = merge->len_s;
4757 
4758   /* send and recv matrix values */
4759   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4760   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4761 
4762   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4763   for (proc = 0, k = 0; proc < size; proc++) {
4764     if (!len_s[proc]) continue;
4765     i = owners[proc];
4766     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4767     k++;
4768   }
4769 
4770   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4771   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4772   PetscCall(PetscFree(status));
4773 
4774   PetscCall(PetscFree(s_waits));
4775   PetscCall(PetscFree(r_waits));
4776 
4777   /* insert mat values of mpimat */
4778   PetscCall(PetscMalloc1(N, &ba_i));
4779   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4780 
4781   for (k = 0; k < merge->nrecv; k++) {
4782     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4783     nrows       = *buf_ri_k[k];
4784     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4785     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4786   }
4787 
4788   /* set values of ba */
4789   m = merge->rowmap->n;
4790   for (i = 0; i < m; i++) {
4791     arow = owners[rank] + i;
4792     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4793     bnzi = bi[i + 1] - bi[i];
4794     PetscCall(PetscArrayzero(ba_i, bnzi));
4795 
4796     /* add local non-zero vals of this proc's seqmat into ba */
4797     anzi   = ai[arow + 1] - ai[arow];
4798     aj     = a->j + ai[arow];
4799     aa     = a_a + ai[arow];
4800     nextaj = 0;
4801     for (j = 0; nextaj < anzi; j++) {
4802       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4803         ba_i[j] += aa[nextaj++];
4804       }
4805     }
4806 
4807     /* add received vals into ba */
4808     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4809       /* i-th row */
4810       if (i == *nextrow[k]) {
4811         anzi   = *(nextai[k] + 1) - *nextai[k];
4812         aj     = buf_rj[k] + *nextai[k];
4813         aa     = abuf_r[k] + *nextai[k];
4814         nextaj = 0;
4815         for (j = 0; nextaj < anzi; j++) {
4816           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4817             ba_i[j] += aa[nextaj++];
4818           }
4819         }
4820         nextrow[k]++;
4821         nextai[k]++;
4822       }
4823     }
4824     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4825   }
4826   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4827   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4828   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4829 
4830   PetscCall(PetscFree(abuf_r[0]));
4831   PetscCall(PetscFree(abuf_r));
4832   PetscCall(PetscFree(ba_i));
4833   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4834   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4835   PetscFunctionReturn(PETSC_SUCCESS);
4836 }
4837 
4838 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4839 {
4840   Mat                  B_mpi;
4841   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4842   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4843   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4844   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4845   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4846   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4847   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4848   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4849   MPI_Status          *status;
4850   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4851   PetscBT              lnkbt;
4852   Mat_Merge_SeqsToMPI *merge;
4853   PetscContainer       container;
4854 
4855   PetscFunctionBegin;
4856   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4857 
4858   /* make sure it is a PETSc comm */
4859   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4860   PetscCallMPI(MPI_Comm_size(comm, &size));
4861   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4862 
4863   PetscCall(PetscNew(&merge));
4864   PetscCall(PetscMalloc1(size, &status));
4865 
4866   /* determine row ownership */
4867   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4868   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4869   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4870   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4871   PetscCall(PetscLayoutSetUp(merge->rowmap));
4872   PetscCall(PetscMalloc1(size, &len_si));
4873   PetscCall(PetscMalloc1(size, &merge->len_s));
4874 
4875   m      = merge->rowmap->n;
4876   owners = merge->rowmap->range;
4877 
4878   /* determine the number of messages to send, their lengths */
4879   len_s = merge->len_s;
4880 
4881   len          = 0; /* length of buf_si[] */
4882   merge->nsend = 0;
4883   for (proc = 0; proc < size; proc++) {
4884     len_si[proc] = 0;
4885     if (proc == rank) {
4886       len_s[proc] = 0;
4887     } else {
4888       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4889       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4890     }
4891     if (len_s[proc]) {
4892       merge->nsend++;
4893       nrows = 0;
4894       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4895         if (ai[i + 1] > ai[i]) nrows++;
4896       }
4897       len_si[proc] = 2 * (nrows + 1);
4898       len += len_si[proc];
4899     }
4900   }
4901 
4902   /* determine the number and length of messages to receive for ij-structure */
4903   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4904   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4905 
4906   /* post the Irecv of j-structure */
4907   PetscCall(PetscCommGetNewTag(comm, &tagj));
4908   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4909 
4910   /* post the Isend of j-structure */
4911   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4912 
4913   for (proc = 0, k = 0; proc < size; proc++) {
4914     if (!len_s[proc]) continue;
4915     i = owners[proc];
4916     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4917     k++;
4918   }
4919 
4920   /* receives and sends of j-structure are complete */
4921   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4922   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4923 
4924   /* send and recv i-structure */
4925   PetscCall(PetscCommGetNewTag(comm, &tagi));
4926   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4927 
4928   PetscCall(PetscMalloc1(len + 1, &buf_s));
4929   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4930   for (proc = 0, k = 0; proc < size; proc++) {
4931     if (!len_s[proc]) continue;
4932     /* form outgoing message for i-structure:
4933          buf_si[0]:                 nrows to be sent
4934                [1:nrows]:           row index (global)
4935                [nrows+1:2*nrows+1]: i-structure index
4936     */
4937     nrows       = len_si[proc] / 2 - 1;
4938     buf_si_i    = buf_si + nrows + 1;
4939     buf_si[0]   = nrows;
4940     buf_si_i[0] = 0;
4941     nrows       = 0;
4942     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4943       anzi = ai[i + 1] - ai[i];
4944       if (anzi) {
4945         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4946         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4947         nrows++;
4948       }
4949     }
4950     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4951     k++;
4952     buf_si += len_si[proc];
4953   }
4954 
4955   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4956   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4957 
4958   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4959   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4960 
4961   PetscCall(PetscFree(len_si));
4962   PetscCall(PetscFree(len_ri));
4963   PetscCall(PetscFree(rj_waits));
4964   PetscCall(PetscFree2(si_waits, sj_waits));
4965   PetscCall(PetscFree(ri_waits));
4966   PetscCall(PetscFree(buf_s));
4967   PetscCall(PetscFree(status));
4968 
4969   /* compute a local seq matrix in each processor */
4970   /* allocate bi array and free space for accumulating nonzero column info */
4971   PetscCall(PetscMalloc1(m + 1, &bi));
4972   bi[0] = 0;
4973 
4974   /* create and initialize a linked list */
4975   nlnk = N + 1;
4976   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4977 
4978   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4979   len = ai[owners[rank + 1]] - ai[owners[rank]];
4980   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4981 
4982   current_space = free_space;
4983 
4984   /* determine symbolic info for each local row */
4985   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4986 
4987   for (k = 0; k < merge->nrecv; k++) {
4988     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4989     nrows       = *buf_ri_k[k];
4990     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4991     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4992   }
4993 
4994   MatPreallocateBegin(comm, m, n, dnz, onz);
4995   len = 0;
4996   for (i = 0; i < m; i++) {
4997     bnzi = 0;
4998     /* add local non-zero cols of this proc's seqmat into lnk */
4999     arow = owners[rank] + i;
5000     anzi = ai[arow + 1] - ai[arow];
5001     aj   = a->j + ai[arow];
5002     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5003     bnzi += nlnk;
5004     /* add received col data into lnk */
5005     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5006       if (i == *nextrow[k]) {            /* i-th row */
5007         anzi = *(nextai[k] + 1) - *nextai[k];
5008         aj   = buf_rj[k] + *nextai[k];
5009         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5010         bnzi += nlnk;
5011         nextrow[k]++;
5012         nextai[k]++;
5013       }
5014     }
5015     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5016 
5017     /* if free space is not available, make more free space */
5018     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5019     /* copy data into free space, then initialize lnk */
5020     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5021     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5022 
5023     current_space->array += bnzi;
5024     current_space->local_used += bnzi;
5025     current_space->local_remaining -= bnzi;
5026 
5027     bi[i + 1] = bi[i] + bnzi;
5028   }
5029 
5030   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5031 
5032   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5033   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5034   PetscCall(PetscLLDestroy(lnk, lnkbt));
5035 
5036   /* create symbolic parallel matrix B_mpi */
5037   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5038   PetscCall(MatCreate(comm, &B_mpi));
5039   if (n == PETSC_DECIDE) {
5040     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5041   } else {
5042     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5043   }
5044   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5045   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5046   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5047   MatPreallocateEnd(dnz, onz);
5048   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5049 
5050   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5051   B_mpi->assembled = PETSC_FALSE;
5052   merge->bi        = bi;
5053   merge->bj        = bj;
5054   merge->buf_ri    = buf_ri;
5055   merge->buf_rj    = buf_rj;
5056   merge->coi       = NULL;
5057   merge->coj       = NULL;
5058   merge->owners_co = NULL;
5059 
5060   PetscCall(PetscCommDestroy(&comm));
5061 
5062   /* attach the supporting struct to B_mpi for reuse */
5063   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5064   PetscCall(PetscContainerSetPointer(container, merge));
5065   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5066   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5067   PetscCall(PetscContainerDestroy(&container));
5068   *mpimat = B_mpi;
5069 
5070   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5071   PetscFunctionReturn(PETSC_SUCCESS);
5072 }
5073 
5074 /*@C
5075   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5076   matrices from each processor
5077 
5078   Collective
5079 
5080   Input Parameters:
5081 + comm   - the communicators the parallel matrix will live on
5082 . seqmat - the input sequential matrices
5083 . m      - number of local rows (or `PETSC_DECIDE`)
5084 . n      - number of local columns (or `PETSC_DECIDE`)
5085 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5086 
5087   Output Parameter:
5088 . mpimat - the parallel matrix generated
5089 
5090   Level: advanced
5091 
5092   Note:
5093   The dimensions of the sequential matrix in each processor MUST be the same.
5094   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5095   destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5096 
5097 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5098 @*/
5099 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5100 {
5101   PetscMPIInt size;
5102 
5103   PetscFunctionBegin;
5104   PetscCallMPI(MPI_Comm_size(comm, &size));
5105   if (size == 1) {
5106     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5107     if (scall == MAT_INITIAL_MATRIX) {
5108       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5109     } else {
5110       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5111     }
5112     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5113     PetscFunctionReturn(PETSC_SUCCESS);
5114   }
5115   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5116   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5117   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5118   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5119   PetscFunctionReturn(PETSC_SUCCESS);
5120 }
5121 
5122 /*@
5123   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5124 
5125   Not Collective
5126 
5127   Input Parameter:
5128 . A - the matrix
5129 
5130   Output Parameter:
5131 . A_loc - the local sequential matrix generated
5132 
5133   Level: developer
5134 
5135   Notes:
5136   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5137   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5138   `n` is the global column count obtained with `MatGetSize()`
5139 
5140   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5141 
5142   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5143 
5144   Destroy the matrix with `MatDestroy()`
5145 
5146 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5147 @*/
5148 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5149 {
5150   PetscBool mpi;
5151 
5152   PetscFunctionBegin;
5153   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5154   if (mpi) {
5155     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5156   } else {
5157     *A_loc = A;
5158     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5159   }
5160   PetscFunctionReturn(PETSC_SUCCESS);
5161 }
5162 
5163 /*@
5164   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5165 
5166   Not Collective
5167 
5168   Input Parameters:
5169 + A     - the matrix
5170 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5171 
5172   Output Parameter:
5173 . A_loc - the local sequential matrix generated
5174 
5175   Level: developer
5176 
5177   Notes:
5178   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5179   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5180   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5181 
5182   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5183 
5184   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5185   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5186   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5187   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5188 
5189 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5190 @*/
5191 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5192 {
5193   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5194   Mat_SeqAIJ        *mat, *a, *b;
5195   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5196   const PetscScalar *aa, *ba, *aav, *bav;
5197   PetscScalar       *ca, *cam;
5198   PetscMPIInt        size;
5199   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5200   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5201   PetscBool          match;
5202 
5203   PetscFunctionBegin;
5204   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5205   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5206   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5207   if (size == 1) {
5208     if (scall == MAT_INITIAL_MATRIX) {
5209       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5210       *A_loc = mpimat->A;
5211     } else if (scall == MAT_REUSE_MATRIX) {
5212       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5213     }
5214     PetscFunctionReturn(PETSC_SUCCESS);
5215   }
5216 
5217   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5218   a  = (Mat_SeqAIJ *)mpimat->A->data;
5219   b  = (Mat_SeqAIJ *)mpimat->B->data;
5220   ai = a->i;
5221   aj = a->j;
5222   bi = b->i;
5223   bj = b->j;
5224   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5225   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5226   aa = aav;
5227   ba = bav;
5228   if (scall == MAT_INITIAL_MATRIX) {
5229     PetscCall(PetscMalloc1(1 + am, &ci));
5230     ci[0] = 0;
5231     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5232     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5233     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5234     k = 0;
5235     for (i = 0; i < am; i++) {
5236       ncols_o = bi[i + 1] - bi[i];
5237       ncols_d = ai[i + 1] - ai[i];
5238       /* off-diagonal portion of A */
5239       for (jo = 0; jo < ncols_o; jo++) {
5240         col = cmap[*bj];
5241         if (col >= cstart) break;
5242         cj[k] = col;
5243         bj++;
5244         ca[k++] = *ba++;
5245       }
5246       /* diagonal portion of A */
5247       for (j = 0; j < ncols_d; j++) {
5248         cj[k]   = cstart + *aj++;
5249         ca[k++] = *aa++;
5250       }
5251       /* off-diagonal portion of A */
5252       for (j = jo; j < ncols_o; j++) {
5253         cj[k]   = cmap[*bj++];
5254         ca[k++] = *ba++;
5255       }
5256     }
5257     /* put together the new matrix */
5258     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5259     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5260     /* Since these are PETSc arrays, change flags to free them as necessary. */
5261     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5262     mat->free_a  = PETSC_TRUE;
5263     mat->free_ij = PETSC_TRUE;
5264     mat->nonew   = 0;
5265   } else if (scall == MAT_REUSE_MATRIX) {
5266     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5267     ci  = mat->i;
5268     cj  = mat->j;
5269     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5270     for (i = 0; i < am; i++) {
5271       /* off-diagonal portion of A */
5272       ncols_o = bi[i + 1] - bi[i];
5273       for (jo = 0; jo < ncols_o; jo++) {
5274         col = cmap[*bj];
5275         if (col >= cstart) break;
5276         *cam++ = *ba++;
5277         bj++;
5278       }
5279       /* diagonal portion of A */
5280       ncols_d = ai[i + 1] - ai[i];
5281       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5282       /* off-diagonal portion of A */
5283       for (j = jo; j < ncols_o; j++) {
5284         *cam++ = *ba++;
5285         bj++;
5286       }
5287     }
5288     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5289   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5290   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5291   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5292   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5293   PetscFunctionReturn(PETSC_SUCCESS);
5294 }
5295 
5296 /*@
5297   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5298   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5299 
5300   Not Collective
5301 
5302   Input Parameters:
5303 + A     - the matrix
5304 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5305 
5306   Output Parameters:
5307 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5308 - A_loc - the local sequential matrix generated
5309 
5310   Level: developer
5311 
5312   Note:
5313   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5314   part, then those associated with the off-diagonal part (in its local ordering)
5315 
5316 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5317 @*/
5318 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5319 {
5320   Mat             Ao, Ad;
5321   const PetscInt *cmap;
5322   PetscMPIInt     size;
5323   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5324 
5325   PetscFunctionBegin;
5326   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5327   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5328   if (size == 1) {
5329     if (scall == MAT_INITIAL_MATRIX) {
5330       PetscCall(PetscObjectReference((PetscObject)Ad));
5331       *A_loc = Ad;
5332     } else if (scall == MAT_REUSE_MATRIX) {
5333       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5334     }
5335     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5336     PetscFunctionReturn(PETSC_SUCCESS);
5337   }
5338   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5339   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5340   if (f) {
5341     PetscCall((*f)(A, scall, glob, A_loc));
5342   } else {
5343     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5344     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5345     Mat_SeqAIJ        *c;
5346     PetscInt          *ai = a->i, *aj = a->j;
5347     PetscInt          *bi = b->i, *bj = b->j;
5348     PetscInt          *ci, *cj;
5349     const PetscScalar *aa, *ba;
5350     PetscScalar       *ca;
5351     PetscInt           i, j, am, dn, on;
5352 
5353     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5354     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5355     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5356     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5357     if (scall == MAT_INITIAL_MATRIX) {
5358       PetscInt k;
5359       PetscCall(PetscMalloc1(1 + am, &ci));
5360       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5361       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5362       ci[0] = 0;
5363       for (i = 0, k = 0; i < am; i++) {
5364         const PetscInt ncols_o = bi[i + 1] - bi[i];
5365         const PetscInt ncols_d = ai[i + 1] - ai[i];
5366         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5367         /* diagonal portion of A */
5368         for (j = 0; j < ncols_d; j++, k++) {
5369           cj[k] = *aj++;
5370           ca[k] = *aa++;
5371         }
5372         /* off-diagonal portion of A */
5373         for (j = 0; j < ncols_o; j++, k++) {
5374           cj[k] = dn + *bj++;
5375           ca[k] = *ba++;
5376         }
5377       }
5378       /* put together the new matrix */
5379       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5380       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5381       /* Since these are PETSc arrays, change flags to free them as necessary. */
5382       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5383       c->free_a  = PETSC_TRUE;
5384       c->free_ij = PETSC_TRUE;
5385       c->nonew   = 0;
5386       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5387     } else if (scall == MAT_REUSE_MATRIX) {
5388       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5389       for (i = 0; i < am; i++) {
5390         const PetscInt ncols_d = ai[i + 1] - ai[i];
5391         const PetscInt ncols_o = bi[i + 1] - bi[i];
5392         /* diagonal portion of A */
5393         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5394         /* off-diagonal portion of A */
5395         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5396       }
5397       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5398     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5399     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5400     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5401     if (glob) {
5402       PetscInt cst, *gidx;
5403 
5404       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5405       PetscCall(PetscMalloc1(dn + on, &gidx));
5406       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5407       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5408       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5409     }
5410   }
5411   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5412   PetscFunctionReturn(PETSC_SUCCESS);
5413 }
5414 
5415 /*@C
5416   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5417 
5418   Not Collective
5419 
5420   Input Parameters:
5421 + A     - the matrix
5422 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5423 . row   - index set of rows to extract (or `NULL`)
5424 - col   - index set of columns to extract (or `NULL`)
5425 
5426   Output Parameter:
5427 . A_loc - the local sequential matrix generated
5428 
5429   Level: developer
5430 
5431 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5432 @*/
5433 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5434 {
5435   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5436   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5437   IS          isrowa, iscola;
5438   Mat        *aloc;
5439   PetscBool   match;
5440 
5441   PetscFunctionBegin;
5442   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5443   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5444   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5445   if (!row) {
5446     start = A->rmap->rstart;
5447     end   = A->rmap->rend;
5448     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5449   } else {
5450     isrowa = *row;
5451   }
5452   if (!col) {
5453     start = A->cmap->rstart;
5454     cmap  = a->garray;
5455     nzA   = a->A->cmap->n;
5456     nzB   = a->B->cmap->n;
5457     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5458     ncols = 0;
5459     for (i = 0; i < nzB; i++) {
5460       if (cmap[i] < start) idx[ncols++] = cmap[i];
5461       else break;
5462     }
5463     imark = i;
5464     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5465     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5466     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5467   } else {
5468     iscola = *col;
5469   }
5470   if (scall != MAT_INITIAL_MATRIX) {
5471     PetscCall(PetscMalloc1(1, &aloc));
5472     aloc[0] = *A_loc;
5473   }
5474   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5475   if (!col) { /* attach global id of condensed columns */
5476     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5477   }
5478   *A_loc = aloc[0];
5479   PetscCall(PetscFree(aloc));
5480   if (!row) PetscCall(ISDestroy(&isrowa));
5481   if (!col) PetscCall(ISDestroy(&iscola));
5482   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5483   PetscFunctionReturn(PETSC_SUCCESS);
5484 }
5485 
5486 /*
5487  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5488  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5489  * on a global size.
5490  * */
5491 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5492 {
5493   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5494   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5495   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5496   PetscMPIInt            owner;
5497   PetscSFNode           *iremote, *oiremote;
5498   const PetscInt        *lrowindices;
5499   PetscSF                sf, osf;
5500   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5501   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5502   MPI_Comm               comm;
5503   ISLocalToGlobalMapping mapping;
5504   const PetscScalar     *pd_a, *po_a;
5505 
5506   PetscFunctionBegin;
5507   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5508   /* plocalsize is the number of roots
5509    * nrows is the number of leaves
5510    * */
5511   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5512   PetscCall(ISGetLocalSize(rows, &nrows));
5513   PetscCall(PetscCalloc1(nrows, &iremote));
5514   PetscCall(ISGetIndices(rows, &lrowindices));
5515   for (i = 0; i < nrows; i++) {
5516     /* Find a remote index and an owner for a row
5517      * The row could be local or remote
5518      * */
5519     owner = 0;
5520     lidx  = 0;
5521     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5522     iremote[i].index = lidx;
5523     iremote[i].rank  = owner;
5524   }
5525   /* Create SF to communicate how many nonzero columns for each row */
5526   PetscCall(PetscSFCreate(comm, &sf));
5527   /* SF will figure out the number of nonzero columns for each row, and their
5528    * offsets
5529    * */
5530   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5531   PetscCall(PetscSFSetFromOptions(sf));
5532   PetscCall(PetscSFSetUp(sf));
5533 
5534   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5535   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5536   PetscCall(PetscCalloc1(nrows, &pnnz));
5537   roffsets[0] = 0;
5538   roffsets[1] = 0;
5539   for (i = 0; i < plocalsize; i++) {
5540     /* diagonal */
5541     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5542     /* off-diagonal */
5543     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5544     /* compute offsets so that we relative location for each row */
5545     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5546     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5547   }
5548   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5549   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5550   /* 'r' means root, and 'l' means leaf */
5551   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5552   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5553   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5554   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5555   PetscCall(PetscSFDestroy(&sf));
5556   PetscCall(PetscFree(roffsets));
5557   PetscCall(PetscFree(nrcols));
5558   dntotalcols = 0;
5559   ontotalcols = 0;
5560   ncol        = 0;
5561   for (i = 0; i < nrows; i++) {
5562     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5563     ncol    = PetscMax(pnnz[i], ncol);
5564     /* diagonal */
5565     dntotalcols += nlcols[i * 2 + 0];
5566     /* off-diagonal */
5567     ontotalcols += nlcols[i * 2 + 1];
5568   }
5569   /* We do not need to figure the right number of columns
5570    * since all the calculations will be done by going through the raw data
5571    * */
5572   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5573   PetscCall(MatSetUp(*P_oth));
5574   PetscCall(PetscFree(pnnz));
5575   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5576   /* diagonal */
5577   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5578   /* off-diagonal */
5579   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5580   /* diagonal */
5581   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5582   /* off-diagonal */
5583   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5584   dntotalcols = 0;
5585   ontotalcols = 0;
5586   ntotalcols  = 0;
5587   for (i = 0; i < nrows; i++) {
5588     owner = 0;
5589     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5590     /* Set iremote for diag matrix */
5591     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5592       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5593       iremote[dntotalcols].rank  = owner;
5594       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5595       ilocal[dntotalcols++] = ntotalcols++;
5596     }
5597     /* off-diagonal */
5598     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5599       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5600       oiremote[ontotalcols].rank  = owner;
5601       oilocal[ontotalcols++]      = ntotalcols++;
5602     }
5603   }
5604   PetscCall(ISRestoreIndices(rows, &lrowindices));
5605   PetscCall(PetscFree(loffsets));
5606   PetscCall(PetscFree(nlcols));
5607   PetscCall(PetscSFCreate(comm, &sf));
5608   /* P serves as roots and P_oth is leaves
5609    * Diag matrix
5610    * */
5611   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5612   PetscCall(PetscSFSetFromOptions(sf));
5613   PetscCall(PetscSFSetUp(sf));
5614 
5615   PetscCall(PetscSFCreate(comm, &osf));
5616   /* off-diagonal */
5617   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5618   PetscCall(PetscSFSetFromOptions(osf));
5619   PetscCall(PetscSFSetUp(osf));
5620   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5621   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5622   /* operate on the matrix internal data to save memory */
5623   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5624   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5625   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5626   /* Convert to global indices for diag matrix */
5627   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5628   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5629   /* We want P_oth store global indices */
5630   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5631   /* Use memory scalable approach */
5632   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5633   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5634   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5635   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5636   /* Convert back to local indices */
5637   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5638   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5639   nout = 0;
5640   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5641   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5642   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5643   /* Exchange values */
5644   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5645   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5646   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5647   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5648   /* Stop PETSc from shrinking memory */
5649   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5650   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5651   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5652   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5653   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5654   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5655   PetscCall(PetscSFDestroy(&sf));
5656   PetscCall(PetscSFDestroy(&osf));
5657   PetscFunctionReturn(PETSC_SUCCESS);
5658 }
5659 
5660 /*
5661  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5662  * This supports MPIAIJ and MAIJ
5663  * */
5664 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5665 {
5666   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5667   Mat_SeqAIJ *p_oth;
5668   IS          rows, map;
5669   PetscHMapI  hamp;
5670   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5671   MPI_Comm    comm;
5672   PetscSF     sf, osf;
5673   PetscBool   has;
5674 
5675   PetscFunctionBegin;
5676   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5677   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5678   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5679    *  and then create a submatrix (that often is an overlapping matrix)
5680    * */
5681   if (reuse == MAT_INITIAL_MATRIX) {
5682     /* Use a hash table to figure out unique keys */
5683     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5684     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5685     count = 0;
5686     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5687     for (i = 0; i < a->B->cmap->n; i++) {
5688       key = a->garray[i] / dof;
5689       PetscCall(PetscHMapIHas(hamp, key, &has));
5690       if (!has) {
5691         mapping[i] = count;
5692         PetscCall(PetscHMapISet(hamp, key, count++));
5693       } else {
5694         /* Current 'i' has the same value the previous step */
5695         mapping[i] = count - 1;
5696       }
5697     }
5698     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5699     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5700     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5701     PetscCall(PetscCalloc1(htsize, &rowindices));
5702     off = 0;
5703     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5704     PetscCall(PetscHMapIDestroy(&hamp));
5705     PetscCall(PetscSortInt(htsize, rowindices));
5706     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5707     /* In case, the matrix was already created but users want to recreate the matrix */
5708     PetscCall(MatDestroy(P_oth));
5709     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5710     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5711     PetscCall(ISDestroy(&map));
5712     PetscCall(ISDestroy(&rows));
5713   } else if (reuse == MAT_REUSE_MATRIX) {
5714     /* If matrix was already created, we simply update values using SF objects
5715      * that as attached to the matrix earlier.
5716      */
5717     const PetscScalar *pd_a, *po_a;
5718 
5719     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5720     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5721     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5722     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5723     /* Update values in place */
5724     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5725     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5726     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5727     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5728     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5729     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5730     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5731     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5732   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5733   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5734   PetscFunctionReturn(PETSC_SUCCESS);
5735 }
5736 
5737 /*@C
5738   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5739 
5740   Collective
5741 
5742   Input Parameters:
5743 + A     - the first matrix in `MATMPIAIJ` format
5744 . B     - the second matrix in `MATMPIAIJ` format
5745 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5746 
5747   Output Parameters:
5748 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5749 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5750 - B_seq - the sequential matrix generated
5751 
5752   Level: developer
5753 
5754 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5755 @*/
5756 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5757 {
5758   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5759   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5760   IS          isrowb, iscolb;
5761   Mat        *bseq = NULL;
5762 
5763   PetscFunctionBegin;
5764   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5765              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5766   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5767 
5768   if (scall == MAT_INITIAL_MATRIX) {
5769     start = A->cmap->rstart;
5770     cmap  = a->garray;
5771     nzA   = a->A->cmap->n;
5772     nzB   = a->B->cmap->n;
5773     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5774     ncols = 0;
5775     for (i = 0; i < nzB; i++) { /* row < local row index */
5776       if (cmap[i] < start) idx[ncols++] = cmap[i];
5777       else break;
5778     }
5779     imark = i;
5780     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5781     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5782     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5783     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5784   } else {
5785     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5786     isrowb = *rowb;
5787     iscolb = *colb;
5788     PetscCall(PetscMalloc1(1, &bseq));
5789     bseq[0] = *B_seq;
5790   }
5791   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5792   *B_seq = bseq[0];
5793   PetscCall(PetscFree(bseq));
5794   if (!rowb) {
5795     PetscCall(ISDestroy(&isrowb));
5796   } else {
5797     *rowb = isrowb;
5798   }
5799   if (!colb) {
5800     PetscCall(ISDestroy(&iscolb));
5801   } else {
5802     *colb = iscolb;
5803   }
5804   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5805   PetscFunctionReturn(PETSC_SUCCESS);
5806 }
5807 
5808 /*
5809     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5810     of the OFF-DIAGONAL portion of local A
5811 
5812     Collective
5813 
5814    Input Parameters:
5815 +    A,B - the matrices in `MATMPIAIJ` format
5816 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5817 
5818    Output Parameter:
5819 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5820 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5821 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5822 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5823 
5824     Developer Note:
5825     This directly accesses information inside the VecScatter associated with the matrix-vector product
5826      for this matrix. This is not desirable..
5827 
5828     Level: developer
5829 
5830 */
5831 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5832 {
5833   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5834   Mat_SeqAIJ        *b_oth;
5835   VecScatter         ctx;
5836   MPI_Comm           comm;
5837   const PetscMPIInt *rprocs, *sprocs;
5838   const PetscInt    *srow, *rstarts, *sstarts;
5839   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5840   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5841   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5842   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5843   PetscMPIInt        size, tag, rank, nreqs;
5844 
5845   PetscFunctionBegin;
5846   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5847   PetscCallMPI(MPI_Comm_size(comm, &size));
5848 
5849   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5850              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5851   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5852   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5853 
5854   if (size == 1) {
5855     startsj_s = NULL;
5856     bufa_ptr  = NULL;
5857     *B_oth    = NULL;
5858     PetscFunctionReturn(PETSC_SUCCESS);
5859   }
5860 
5861   ctx = a->Mvctx;
5862   tag = ((PetscObject)ctx)->tag;
5863 
5864   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5865   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5866   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5867   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5868   PetscCall(PetscMalloc1(nreqs, &reqs));
5869   rwaits = reqs;
5870   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5871 
5872   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5873   if (scall == MAT_INITIAL_MATRIX) {
5874     /* i-array */
5875     /*  post receives */
5876     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5877     for (i = 0; i < nrecvs; i++) {
5878       rowlen = rvalues + rstarts[i] * rbs;
5879       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5880       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5881     }
5882 
5883     /* pack the outgoing message */
5884     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5885 
5886     sstartsj[0] = 0;
5887     rstartsj[0] = 0;
5888     len         = 0; /* total length of j or a array to be sent */
5889     if (nsends) {
5890       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5891       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5892     }
5893     for (i = 0; i < nsends; i++) {
5894       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5895       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5896       for (j = 0; j < nrows; j++) {
5897         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5898         for (l = 0; l < sbs; l++) {
5899           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5900 
5901           rowlen[j * sbs + l] = ncols;
5902 
5903           len += ncols;
5904           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5905         }
5906         k++;
5907       }
5908       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5909 
5910       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5911     }
5912     /* recvs and sends of i-array are completed */
5913     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5914     PetscCall(PetscFree(svalues));
5915 
5916     /* allocate buffers for sending j and a arrays */
5917     PetscCall(PetscMalloc1(len + 1, &bufj));
5918     PetscCall(PetscMalloc1(len + 1, &bufa));
5919 
5920     /* create i-array of B_oth */
5921     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5922 
5923     b_othi[0] = 0;
5924     len       = 0; /* total length of j or a array to be received */
5925     k         = 0;
5926     for (i = 0; i < nrecvs; i++) {
5927       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5928       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5929       for (j = 0; j < nrows; j++) {
5930         b_othi[k + 1] = b_othi[k] + rowlen[j];
5931         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5932         k++;
5933       }
5934       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5935     }
5936     PetscCall(PetscFree(rvalues));
5937 
5938     /* allocate space for j and a arrays of B_oth */
5939     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5940     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5941 
5942     /* j-array */
5943     /*  post receives of j-array */
5944     for (i = 0; i < nrecvs; i++) {
5945       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5946       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5947     }
5948 
5949     /* pack the outgoing message j-array */
5950     if (nsends) k = sstarts[0];
5951     for (i = 0; i < nsends; i++) {
5952       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5953       bufJ  = bufj + sstartsj[i];
5954       for (j = 0; j < nrows; j++) {
5955         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5956         for (ll = 0; ll < sbs; ll++) {
5957           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5958           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5959           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5960         }
5961       }
5962       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5963     }
5964 
5965     /* recvs and sends of j-array are completed */
5966     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5967   } else if (scall == MAT_REUSE_MATRIX) {
5968     sstartsj = *startsj_s;
5969     rstartsj = *startsj_r;
5970     bufa     = *bufa_ptr;
5971     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5972     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5973   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5974 
5975   /* a-array */
5976   /*  post receives of a-array */
5977   for (i = 0; i < nrecvs; i++) {
5978     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5979     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5980   }
5981 
5982   /* pack the outgoing message a-array */
5983   if (nsends) k = sstarts[0];
5984   for (i = 0; i < nsends; i++) {
5985     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5986     bufA  = bufa + sstartsj[i];
5987     for (j = 0; j < nrows; j++) {
5988       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5989       for (ll = 0; ll < sbs; ll++) {
5990         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5991         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5992         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5993       }
5994     }
5995     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5996   }
5997   /* recvs and sends of a-array are completed */
5998   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5999   PetscCall(PetscFree(reqs));
6000 
6001   if (scall == MAT_INITIAL_MATRIX) {
6002     /* put together the new matrix */
6003     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6004 
6005     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6006     /* Since these are PETSc arrays, change flags to free them as necessary. */
6007     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6008     b_oth->free_a  = PETSC_TRUE;
6009     b_oth->free_ij = PETSC_TRUE;
6010     b_oth->nonew   = 0;
6011 
6012     PetscCall(PetscFree(bufj));
6013     if (!startsj_s || !bufa_ptr) {
6014       PetscCall(PetscFree2(sstartsj, rstartsj));
6015       PetscCall(PetscFree(bufa_ptr));
6016     } else {
6017       *startsj_s = sstartsj;
6018       *startsj_r = rstartsj;
6019       *bufa_ptr  = bufa;
6020     }
6021   } else if (scall == MAT_REUSE_MATRIX) {
6022     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6023   }
6024 
6025   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6026   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6027   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6028   PetscFunctionReturn(PETSC_SUCCESS);
6029 }
6030 
6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6034 #if defined(PETSC_HAVE_MKL_SPARSE)
6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6036 #endif
6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6039 #if defined(PETSC_HAVE_ELEMENTAL)
6040 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6041 #endif
6042 #if defined(PETSC_HAVE_SCALAPACK)
6043 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6044 #endif
6045 #if defined(PETSC_HAVE_HYPRE)
6046 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6047 #endif
6048 #if defined(PETSC_HAVE_CUDA)
6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6050 #endif
6051 #if defined(PETSC_HAVE_HIP)
6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6053 #endif
6054 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6056 #endif
6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6058 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6059 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6060 
6061 /*
6062     Computes (B'*A')' since computing B*A directly is untenable
6063 
6064                n                       p                          p
6065         [             ]       [             ]         [                 ]
6066       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6067         [             ]       [             ]         [                 ]
6068 
6069 */
6070 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6071 {
6072   Mat At, Bt, Ct;
6073 
6074   PetscFunctionBegin;
6075   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6076   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6077   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6078   PetscCall(MatDestroy(&At));
6079   PetscCall(MatDestroy(&Bt));
6080   PetscCall(MatTransposeSetPrecursor(Ct, C));
6081   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6082   PetscCall(MatDestroy(&Ct));
6083   PetscFunctionReturn(PETSC_SUCCESS);
6084 }
6085 
6086 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6087 {
6088   PetscBool cisdense;
6089 
6090   PetscFunctionBegin;
6091   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6092   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6093   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6094   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6095   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6096   PetscCall(MatSetUp(C));
6097 
6098   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6099   PetscFunctionReturn(PETSC_SUCCESS);
6100 }
6101 
6102 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6103 {
6104   Mat_Product *product = C->product;
6105   Mat          A = product->A, B = product->B;
6106 
6107   PetscFunctionBegin;
6108   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6109              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6110   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6111   C->ops->productsymbolic = MatProductSymbolic_AB;
6112   PetscFunctionReturn(PETSC_SUCCESS);
6113 }
6114 
6115 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6116 {
6117   Mat_Product *product = C->product;
6118 
6119   PetscFunctionBegin;
6120   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6121   PetscFunctionReturn(PETSC_SUCCESS);
6122 }
6123 
6124 /*
6125    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6126 
6127   Input Parameters:
6128 
6129     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6130     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6131 
6132     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6133 
6134     For Set1, j1[] contains column indices of the nonzeros.
6135     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6136     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6137     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6138 
6139     Similar for Set2.
6140 
6141     This routine merges the two sets of nonzeros row by row and removes repeats.
6142 
6143   Output Parameters: (memory is allocated by the caller)
6144 
6145     i[],j[]: the CSR of the merged matrix, which has m rows.
6146     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6147     imap2[]: similar to imap1[], but for Set2.
6148     Note we order nonzeros row-by-row and from left to right.
6149 */
6150 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6151 {
6152   PetscInt   r, m; /* Row index of mat */
6153   PetscCount t, t1, t2, b1, e1, b2, e2;
6154 
6155   PetscFunctionBegin;
6156   PetscCall(MatGetLocalSize(mat, &m, NULL));
6157   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6158   i[0]        = 0;
6159   for (r = 0; r < m; r++) { /* Do row by row merging */
6160     b1 = rowBegin1[r];
6161     e1 = rowEnd1[r];
6162     b2 = rowBegin2[r];
6163     e2 = rowEnd2[r];
6164     while (b1 < e1 && b2 < e2) {
6165       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6166         j[t]      = j1[b1];
6167         imap1[t1] = t;
6168         imap2[t2] = t;
6169         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6170         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6171         t1++;
6172         t2++;
6173         t++;
6174       } else if (j1[b1] < j2[b2]) {
6175         j[t]      = j1[b1];
6176         imap1[t1] = t;
6177         b1 += jmap1[t1 + 1] - jmap1[t1];
6178         t1++;
6179         t++;
6180       } else {
6181         j[t]      = j2[b2];
6182         imap2[t2] = t;
6183         b2 += jmap2[t2 + 1] - jmap2[t2];
6184         t2++;
6185         t++;
6186       }
6187     }
6188     /* Merge the remaining in either j1[] or j2[] */
6189     while (b1 < e1) {
6190       j[t]      = j1[b1];
6191       imap1[t1] = t;
6192       b1 += jmap1[t1 + 1] - jmap1[t1];
6193       t1++;
6194       t++;
6195     }
6196     while (b2 < e2) {
6197       j[t]      = j2[b2];
6198       imap2[t2] = t;
6199       b2 += jmap2[t2 + 1] - jmap2[t2];
6200       t2++;
6201       t++;
6202     }
6203     i[r + 1] = t;
6204   }
6205   PetscFunctionReturn(PETSC_SUCCESS);
6206 }
6207 
6208 /*
6209   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6210 
6211   Input Parameters:
6212     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6213     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6214       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6215 
6216       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6217       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6218 
6219   Output Parameters:
6220     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6221     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6222       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6223       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6224 
6225     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6226       Atot: number of entries belonging to the diagonal block.
6227       Annz: number of unique nonzeros belonging to the diagonal block.
6228       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6229         repeats (i.e., same 'i,j' pair).
6230       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6231         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6232 
6233       Atot: number of entries belonging to the diagonal block
6234       Annz: number of unique nonzeros belonging to the diagonal block.
6235 
6236     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6237 
6238     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6239 */
6240 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6241 {
6242   PetscInt    cstart, cend, rstart, rend, row, col;
6243   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6244   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6245   PetscCount  k, m, p, q, r, s, mid;
6246   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6247 
6248   PetscFunctionBegin;
6249   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6250   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6251   m = rend - rstart;
6252 
6253   /* Skip negative rows */
6254   for (k = 0; k < n; k++)
6255     if (i[k] >= 0) break;
6256 
6257   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6258      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6259   */
6260   while (k < n) {
6261     row = i[k];
6262     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6263     for (s = k; s < n; s++)
6264       if (i[s] != row) break;
6265 
6266     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6267     for (p = k; p < s; p++) {
6268       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6269       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6270     }
6271     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6272     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6273     rowBegin[row - rstart] = k;
6274     rowMid[row - rstart]   = mid;
6275     rowEnd[row - rstart]   = s;
6276 
6277     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6278     Atot += mid - k;
6279     Btot += s - mid;
6280 
6281     /* Count unique nonzeros of this diag row */
6282     for (p = k; p < mid;) {
6283       col = j[p];
6284       do {
6285         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6286         p++;
6287       } while (p < mid && j[p] == col);
6288       Annz++;
6289     }
6290 
6291     /* Count unique nonzeros of this offdiag row */
6292     for (p = mid; p < s;) {
6293       col = j[p];
6294       do {
6295         p++;
6296       } while (p < s && j[p] == col);
6297       Bnnz++;
6298     }
6299     k = s;
6300   }
6301 
6302   /* Allocation according to Atot, Btot, Annz, Bnnz */
6303   PetscCall(PetscMalloc1(Atot, &Aperm));
6304   PetscCall(PetscMalloc1(Btot, &Bperm));
6305   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6306   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6307 
6308   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6309   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6310   for (r = 0; r < m; r++) {
6311     k   = rowBegin[r];
6312     mid = rowMid[r];
6313     s   = rowEnd[r];
6314     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6315     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6316     Atot += mid - k;
6317     Btot += s - mid;
6318 
6319     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6320     for (p = k; p < mid;) {
6321       col = j[p];
6322       q   = p;
6323       do {
6324         p++;
6325       } while (p < mid && j[p] == col);
6326       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6327       Annz++;
6328     }
6329 
6330     for (p = mid; p < s;) {
6331       col = j[p];
6332       q   = p;
6333       do {
6334         p++;
6335       } while (p < s && j[p] == col);
6336       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6337       Bnnz++;
6338     }
6339   }
6340   /* Output */
6341   *Aperm_ = Aperm;
6342   *Annz_  = Annz;
6343   *Atot_  = Atot;
6344   *Ajmap_ = Ajmap;
6345   *Bperm_ = Bperm;
6346   *Bnnz_  = Bnnz;
6347   *Btot_  = Btot;
6348   *Bjmap_ = Bjmap;
6349   PetscFunctionReturn(PETSC_SUCCESS);
6350 }
6351 
6352 /*
6353   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6354 
6355   Input Parameters:
6356     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6357     nnz:  number of unique nonzeros in the merged matrix
6358     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6359     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6360 
6361   Output Parameter: (memory is allocated by the caller)
6362     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6363 
6364   Example:
6365     nnz1 = 4
6366     nnz  = 6
6367     imap = [1,3,4,5]
6368     jmap = [0,3,5,6,7]
6369    then,
6370     jmap_new = [0,0,3,3,5,6,7]
6371 */
6372 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6373 {
6374   PetscCount k, p;
6375 
6376   PetscFunctionBegin;
6377   jmap_new[0] = 0;
6378   p           = nnz;                /* p loops over jmap_new[] backwards */
6379   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6380     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6381   }
6382   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6383   PetscFunctionReturn(PETSC_SUCCESS);
6384 }
6385 
6386 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6387 {
6388   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6389 
6390   PetscFunctionBegin;
6391   PetscCall(PetscSFDestroy(&coo->sf));
6392   PetscCall(PetscFree(coo->Aperm1));
6393   PetscCall(PetscFree(coo->Bperm1));
6394   PetscCall(PetscFree(coo->Ajmap1));
6395   PetscCall(PetscFree(coo->Bjmap1));
6396   PetscCall(PetscFree(coo->Aimap2));
6397   PetscCall(PetscFree(coo->Bimap2));
6398   PetscCall(PetscFree(coo->Aperm2));
6399   PetscCall(PetscFree(coo->Bperm2));
6400   PetscCall(PetscFree(coo->Ajmap2));
6401   PetscCall(PetscFree(coo->Bjmap2));
6402   PetscCall(PetscFree(coo->Cperm1));
6403   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6404   PetscCall(PetscFree(coo));
6405   PetscFunctionReturn(PETSC_SUCCESS);
6406 }
6407 
6408 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6409 {
6410   MPI_Comm             comm;
6411   PetscMPIInt          rank, size;
6412   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6413   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6414   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6415   PetscContainer       container;
6416   MatCOOStruct_MPIAIJ *coo;
6417 
6418   PetscFunctionBegin;
6419   PetscCall(PetscFree(mpiaij->garray));
6420   PetscCall(VecDestroy(&mpiaij->lvec));
6421 #if defined(PETSC_USE_CTABLE)
6422   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6423 #else
6424   PetscCall(PetscFree(mpiaij->colmap));
6425 #endif
6426   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6427   mat->assembled     = PETSC_FALSE;
6428   mat->was_assembled = PETSC_FALSE;
6429 
6430   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6431   PetscCallMPI(MPI_Comm_size(comm, &size));
6432   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6433   PetscCall(PetscLayoutSetUp(mat->rmap));
6434   PetscCall(PetscLayoutSetUp(mat->cmap));
6435   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6436   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6437   PetscCall(MatGetLocalSize(mat, &m, &n));
6438   PetscCall(MatGetSize(mat, &M, &N));
6439 
6440   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6441   /* entries come first, then local rows, then remote rows.                     */
6442   PetscCount n1 = coo_n, *perm1;
6443   PetscInt  *i1 = coo_i, *j1 = coo_j;
6444 
6445   PetscCall(PetscMalloc1(n1, &perm1));
6446   for (k = 0; k < n1; k++) perm1[k] = k;
6447 
6448   /* Manipulate indices so that entries with negative row or col indices will have smallest
6449      row indices, local entries will have greater but negative row indices, and remote entries
6450      will have positive row indices.
6451   */
6452   for (k = 0; k < n1; k++) {
6453     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6454     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6455     else {
6456       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6457       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6458     }
6459   }
6460 
6461   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6462   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6463 
6464   /* Advance k to the first entry we need to take care of */
6465   for (k = 0; k < n1; k++)
6466     if (i1[k] > PETSC_MIN_INT) break;
6467   PetscInt i1start = k;
6468 
6469   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6470   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6471 
6472   /*           Send remote rows to their owner                                  */
6473   /* Find which rows should be sent to which remote ranks*/
6474   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6475   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6476   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6477   const PetscInt *ranges;
6478   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6479 
6480   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6481   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6482   for (k = rem; k < n1;) {
6483     PetscMPIInt owner;
6484     PetscInt    firstRow, lastRow;
6485 
6486     /* Locate a row range */
6487     firstRow = i1[k]; /* first row of this owner */
6488     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6489     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6490 
6491     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6492     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6493 
6494     /* All entries in [k,p) belong to this remote owner */
6495     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6496       PetscMPIInt *sendto2;
6497       PetscInt    *nentries2;
6498       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6499 
6500       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6501       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6502       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6503       PetscCall(PetscFree2(sendto, nentries2));
6504       sendto   = sendto2;
6505       nentries = nentries2;
6506       maxNsend = maxNsend2;
6507     }
6508     sendto[nsend]   = owner;
6509     nentries[nsend] = p - k;
6510     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6511     nsend++;
6512     k = p;
6513   }
6514 
6515   /* Build 1st SF to know offsets on remote to send data */
6516   PetscSF      sf1;
6517   PetscInt     nroots = 1, nroots2 = 0;
6518   PetscInt     nleaves = nsend, nleaves2 = 0;
6519   PetscInt    *offsets;
6520   PetscSFNode *iremote;
6521 
6522   PetscCall(PetscSFCreate(comm, &sf1));
6523   PetscCall(PetscMalloc1(nsend, &iremote));
6524   PetscCall(PetscMalloc1(nsend, &offsets));
6525   for (k = 0; k < nsend; k++) {
6526     iremote[k].rank  = sendto[k];
6527     iremote[k].index = 0;
6528     nleaves2 += nentries[k];
6529     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6530   }
6531   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6532   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6533   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6534   PetscCall(PetscSFDestroy(&sf1));
6535   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6536 
6537   /* Build 2nd SF to send remote COOs to their owner */
6538   PetscSF sf2;
6539   nroots  = nroots2;
6540   nleaves = nleaves2;
6541   PetscCall(PetscSFCreate(comm, &sf2));
6542   PetscCall(PetscSFSetFromOptions(sf2));
6543   PetscCall(PetscMalloc1(nleaves, &iremote));
6544   p = 0;
6545   for (k = 0; k < nsend; k++) {
6546     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6547     for (q = 0; q < nentries[k]; q++, p++) {
6548       iremote[p].rank  = sendto[k];
6549       iremote[p].index = offsets[k] + q;
6550     }
6551   }
6552   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6553 
6554   /* Send the remote COOs to their owner */
6555   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6556   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6557   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6558   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6559   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6560   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6561   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6562 
6563   PetscCall(PetscFree(offsets));
6564   PetscCall(PetscFree2(sendto, nentries));
6565 
6566   /* Sort received COOs by row along with the permutation array     */
6567   for (k = 0; k < n2; k++) perm2[k] = k;
6568   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6569 
6570   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6571   PetscCount *Cperm1;
6572   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6573   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves));
6574 
6575   /* Support for HYPRE matrices, kind of a hack.
6576      Swap min column with diagonal so that diagonal values will go first */
6577   PetscBool   hypre;
6578   const char *name;
6579   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6580   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6581   if (hypre) {
6582     PetscInt *minj;
6583     PetscBT   hasdiag;
6584 
6585     PetscCall(PetscBTCreate(m, &hasdiag));
6586     PetscCall(PetscMalloc1(m, &minj));
6587     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6588     for (k = i1start; k < rem; k++) {
6589       if (j1[k] < cstart || j1[k] >= cend) continue;
6590       const PetscInt rindex = i1[k] - rstart;
6591       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6592       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6593     }
6594     for (k = 0; k < n2; k++) {
6595       if (j2[k] < cstart || j2[k] >= cend) continue;
6596       const PetscInt rindex = i2[k] - rstart;
6597       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6598       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6599     }
6600     for (k = i1start; k < rem; k++) {
6601       const PetscInt rindex = i1[k] - rstart;
6602       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6603       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6604       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6605     }
6606     for (k = 0; k < n2; k++) {
6607       const PetscInt rindex = i2[k] - rstart;
6608       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6609       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6610       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6611     }
6612     PetscCall(PetscBTDestroy(&hasdiag));
6613     PetscCall(PetscFree(minj));
6614   }
6615 
6616   /* Split local COOs and received COOs into diag/offdiag portions */
6617   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6618   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6619   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6620   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6621   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6622   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6623 
6624   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6625   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6626   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6627   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6628 
6629   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6630   PetscInt *Ai, *Bi;
6631   PetscInt *Aj, *Bj;
6632 
6633   PetscCall(PetscMalloc1(m + 1, &Ai));
6634   PetscCall(PetscMalloc1(m + 1, &Bi));
6635   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6636   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6637 
6638   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6639   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6640   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6641   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6642   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6643 
6644   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6645   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6646 
6647   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6648   /* expect nonzeros in A/B most likely have local contributing entries        */
6649   PetscInt    Annz = Ai[m];
6650   PetscInt    Bnnz = Bi[m];
6651   PetscCount *Ajmap1_new, *Bjmap1_new;
6652 
6653   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6654   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6655 
6656   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6657   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6658 
6659   PetscCall(PetscFree(Aimap1));
6660   PetscCall(PetscFree(Ajmap1));
6661   PetscCall(PetscFree(Bimap1));
6662   PetscCall(PetscFree(Bjmap1));
6663   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6664   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6665   PetscCall(PetscFree(perm1));
6666   PetscCall(PetscFree3(i2, j2, perm2));
6667 
6668   Ajmap1 = Ajmap1_new;
6669   Bjmap1 = Bjmap1_new;
6670 
6671   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6672   if (Annz < Annz1 + Annz2) {
6673     PetscInt *Aj_new;
6674     PetscCall(PetscMalloc1(Annz, &Aj_new));
6675     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6676     PetscCall(PetscFree(Aj));
6677     Aj = Aj_new;
6678   }
6679 
6680   if (Bnnz < Bnnz1 + Bnnz2) {
6681     PetscInt *Bj_new;
6682     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6683     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6684     PetscCall(PetscFree(Bj));
6685     Bj = Bj_new;
6686   }
6687 
6688   /* Create new submatrices for on-process and off-process coupling                  */
6689   PetscScalar     *Aa, *Ba;
6690   MatType          rtype;
6691   Mat_SeqAIJ      *a, *b;
6692   PetscObjectState state;
6693   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6694   PetscCall(PetscCalloc1(Bnnz, &Ba));
6695   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6696   if (cstart) {
6697     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6698   }
6699 
6700   PetscCall(MatGetRootType_Private(mat, &rtype));
6701 
6702   MatSeqXAIJGetOptions_Private(mpiaij->A);
6703   PetscCall(MatDestroy(&mpiaij->A));
6704   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6705   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6706   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6707 
6708   MatSeqXAIJGetOptions_Private(mpiaij->B);
6709   PetscCall(MatDestroy(&mpiaij->B));
6710   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6711   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6712   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6713 
6714   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6715   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6716   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6717   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6718 
6719   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6720   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6721   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6722   a->free_a = b->free_a = PETSC_TRUE;
6723   a->free_ij = b->free_ij = PETSC_TRUE;
6724 
6725   /* conversion must happen AFTER multiply setup */
6726   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6727   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6728   PetscCall(VecDestroy(&mpiaij->lvec));
6729   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6730 
6731   // Put the COO struct in a container and then attach that to the matrix
6732   PetscCall(PetscMalloc1(1, &coo));
6733   coo->n       = coo_n;
6734   coo->sf      = sf2;
6735   coo->sendlen = nleaves;
6736   coo->recvlen = nroots;
6737   coo->Annz    = Annz;
6738   coo->Bnnz    = Bnnz;
6739   coo->Annz2   = Annz2;
6740   coo->Bnnz2   = Bnnz2;
6741   coo->Atot1   = Atot1;
6742   coo->Atot2   = Atot2;
6743   coo->Btot1   = Btot1;
6744   coo->Btot2   = Btot2;
6745   coo->Ajmap1  = Ajmap1;
6746   coo->Aperm1  = Aperm1;
6747   coo->Bjmap1  = Bjmap1;
6748   coo->Bperm1  = Bperm1;
6749   coo->Aimap2  = Aimap2;
6750   coo->Ajmap2  = Ajmap2;
6751   coo->Aperm2  = Aperm2;
6752   coo->Bimap2  = Bimap2;
6753   coo->Bjmap2  = Bjmap2;
6754   coo->Bperm2  = Bperm2;
6755   coo->Cperm1  = Cperm1;
6756   // Allocate in preallocation. If not used, it has zero cost on host
6757   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6758   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6759   PetscCall(PetscContainerSetPointer(container, coo));
6760   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6761   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6762   PetscCall(PetscContainerDestroy(&container));
6763   PetscFunctionReturn(PETSC_SUCCESS);
6764 }
6765 
6766 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6767 {
6768   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6769   Mat                  A = mpiaij->A, B = mpiaij->B;
6770   PetscScalar         *Aa, *Ba;
6771   PetscScalar         *sendbuf, *recvbuf;
6772   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6773   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6774   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6775   const PetscCount    *Cperm1;
6776   PetscContainer       container;
6777   MatCOOStruct_MPIAIJ *coo;
6778 
6779   PetscFunctionBegin;
6780   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6781   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6782   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6783   sendbuf = coo->sendbuf;
6784   recvbuf = coo->recvbuf;
6785   Ajmap1  = coo->Ajmap1;
6786   Ajmap2  = coo->Ajmap2;
6787   Aimap2  = coo->Aimap2;
6788   Bjmap1  = coo->Bjmap1;
6789   Bjmap2  = coo->Bjmap2;
6790   Bimap2  = coo->Bimap2;
6791   Aperm1  = coo->Aperm1;
6792   Aperm2  = coo->Aperm2;
6793   Bperm1  = coo->Bperm1;
6794   Bperm2  = coo->Bperm2;
6795   Cperm1  = coo->Cperm1;
6796 
6797   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6798   PetscCall(MatSeqAIJGetArray(B, &Ba));
6799 
6800   /* Pack entries to be sent to remote */
6801   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6802 
6803   /* Send remote entries to their owner and overlap the communication with local computation */
6804   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6805   /* Add local entries to A and B */
6806   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6807     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6808     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6809     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6810   }
6811   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6812     PetscScalar sum = 0.0;
6813     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6814     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6815   }
6816   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6817 
6818   /* Add received remote entries to A and B */
6819   for (PetscCount i = 0; i < coo->Annz2; i++) {
6820     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6821   }
6822   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6823     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6824   }
6825   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6826   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6827   PetscFunctionReturn(PETSC_SUCCESS);
6828 }
6829 
6830 /*MC
6831    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6832 
6833    Options Database Keys:
6834 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6835 
6836    Level: beginner
6837 
6838    Notes:
6839    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6840     in this case the values associated with the rows and columns one passes in are set to zero
6841     in the matrix
6842 
6843     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6844     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6845 
6846 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6847 M*/
6848 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6849 {
6850   Mat_MPIAIJ *b;
6851   PetscMPIInt size;
6852 
6853   PetscFunctionBegin;
6854   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6855 
6856   PetscCall(PetscNew(&b));
6857   B->data       = (void *)b;
6858   B->ops[0]     = MatOps_Values;
6859   B->assembled  = PETSC_FALSE;
6860   B->insertmode = NOT_SET_VALUES;
6861   b->size       = size;
6862 
6863   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6864 
6865   /* build cache for off array entries formed */
6866   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6867 
6868   b->donotstash  = PETSC_FALSE;
6869   b->colmap      = NULL;
6870   b->garray      = NULL;
6871   b->roworiented = PETSC_TRUE;
6872 
6873   /* stuff used for matrix vector multiply */
6874   b->lvec  = NULL;
6875   b->Mvctx = NULL;
6876 
6877   /* stuff for MatGetRow() */
6878   b->rowindices   = NULL;
6879   b->rowvalues    = NULL;
6880   b->getrowactive = PETSC_FALSE;
6881 
6882   /* flexible pointer used in CUSPARSE classes */
6883   b->spptr = NULL;
6884 
6885   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6886   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6887   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6888   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6889   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6890   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6891   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6892   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6893   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6894   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6895 #if defined(PETSC_HAVE_CUDA)
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6897 #endif
6898 #if defined(PETSC_HAVE_HIP)
6899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6900 #endif
6901 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6902   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6903 #endif
6904 #if defined(PETSC_HAVE_MKL_SPARSE)
6905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6906 #endif
6907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6911 #if defined(PETSC_HAVE_ELEMENTAL)
6912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6913 #endif
6914 #if defined(PETSC_HAVE_SCALAPACK)
6915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6916 #endif
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6919 #if defined(PETSC_HAVE_HYPRE)
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6922 #endif
6923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6927   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6928   PetscFunctionReturn(PETSC_SUCCESS);
6929 }
6930 
6931 /*@C
6932   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6933   and "off-diagonal" part of the matrix in CSR format.
6934 
6935   Collective
6936 
6937   Input Parameters:
6938 + comm - MPI communicator
6939 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6940 . n    - This value should be the same as the local size used in creating the
6941          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6942          calculated if `N` is given) For square matrices `n` is almost always `m`.
6943 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6944 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6945 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6946 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6947 . a    - matrix values
6948 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6949 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6950 - oa   - matrix values
6951 
6952   Output Parameter:
6953 . mat - the matrix
6954 
6955   Level: advanced
6956 
6957   Notes:
6958   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6959   must free the arrays once the matrix has been destroyed and not before.
6960 
6961   The `i` and `j` indices are 0 based
6962 
6963   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6964 
6965   This sets local rows and cannot be used to set off-processor values.
6966 
6967   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6968   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6969   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6970   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6971   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6972   communication if it is known that only local entries will be set.
6973 
6974 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6975           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6976 @*/
6977 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6978 {
6979   Mat_MPIAIJ *maij;
6980 
6981   PetscFunctionBegin;
6982   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6983   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6984   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6985   PetscCall(MatCreate(comm, mat));
6986   PetscCall(MatSetSizes(*mat, m, n, M, N));
6987   PetscCall(MatSetType(*mat, MATMPIAIJ));
6988   maij = (Mat_MPIAIJ *)(*mat)->data;
6989 
6990   (*mat)->preallocated = PETSC_TRUE;
6991 
6992   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6993   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6994 
6995   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6996   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6997 
6998   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6999   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7000   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7001   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7002   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7003   PetscFunctionReturn(PETSC_SUCCESS);
7004 }
7005 
7006 typedef struct {
7007   Mat       *mp;    /* intermediate products */
7008   PetscBool *mptmp; /* is the intermediate product temporary ? */
7009   PetscInt   cp;    /* number of intermediate products */
7010 
7011   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7012   PetscInt    *startsj_s, *startsj_r;
7013   PetscScalar *bufa;
7014   Mat          P_oth;
7015 
7016   /* may take advantage of merging product->B */
7017   Mat Bloc; /* B-local by merging diag and off-diag */
7018 
7019   /* cusparse does not have support to split between symbolic and numeric phases.
7020      When api_user is true, we don't need to update the numerical values
7021      of the temporary storage */
7022   PetscBool reusesym;
7023 
7024   /* support for COO values insertion */
7025   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7026   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7027   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7028   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7029   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7030   PetscMemType mtype;
7031 
7032   /* customization */
7033   PetscBool abmerge;
7034   PetscBool P_oth_bind;
7035 } MatMatMPIAIJBACKEND;
7036 
7037 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7038 {
7039   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7040   PetscInt             i;
7041 
7042   PetscFunctionBegin;
7043   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7044   PetscCall(PetscFree(mmdata->bufa));
7045   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7046   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7047   PetscCall(MatDestroy(&mmdata->P_oth));
7048   PetscCall(MatDestroy(&mmdata->Bloc));
7049   PetscCall(PetscSFDestroy(&mmdata->sf));
7050   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7051   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7052   PetscCall(PetscFree(mmdata->own[0]));
7053   PetscCall(PetscFree(mmdata->own));
7054   PetscCall(PetscFree(mmdata->off[0]));
7055   PetscCall(PetscFree(mmdata->off));
7056   PetscCall(PetscFree(mmdata));
7057   PetscFunctionReturn(PETSC_SUCCESS);
7058 }
7059 
7060 /* Copy selected n entries with indices in idx[] of A to v[].
7061    If idx is NULL, copy the whole data array of A to v[]
7062  */
7063 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7064 {
7065   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7066 
7067   PetscFunctionBegin;
7068   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7069   if (f) {
7070     PetscCall((*f)(A, n, idx, v));
7071   } else {
7072     const PetscScalar *vv;
7073 
7074     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7075     if (n && idx) {
7076       PetscScalar    *w  = v;
7077       const PetscInt *oi = idx;
7078       PetscInt        j;
7079 
7080       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7081     } else {
7082       PetscCall(PetscArraycpy(v, vv, n));
7083     }
7084     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7085   }
7086   PetscFunctionReturn(PETSC_SUCCESS);
7087 }
7088 
7089 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7090 {
7091   MatMatMPIAIJBACKEND *mmdata;
7092   PetscInt             i, n_d, n_o;
7093 
7094   PetscFunctionBegin;
7095   MatCheckProduct(C, 1);
7096   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7097   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7098   if (!mmdata->reusesym) { /* update temporary matrices */
7099     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7100     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7101   }
7102   mmdata->reusesym = PETSC_FALSE;
7103 
7104   for (i = 0; i < mmdata->cp; i++) {
7105     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7106     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7107   }
7108   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7109     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7110 
7111     if (mmdata->mptmp[i]) continue;
7112     if (noff) {
7113       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7114 
7115       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7116       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7117       n_o += noff;
7118       n_d += nown;
7119     } else {
7120       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7121 
7122       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7123       n_d += mm->nz;
7124     }
7125   }
7126   if (mmdata->hasoffproc) { /* offprocess insertion */
7127     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7128     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7129   }
7130   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7131   PetscFunctionReturn(PETSC_SUCCESS);
7132 }
7133 
7134 /* Support for Pt * A, A * P, or Pt * A * P */
7135 #define MAX_NUMBER_INTERMEDIATE 4
7136 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7137 {
7138   Mat_Product           *product = C->product;
7139   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7140   Mat_MPIAIJ            *a, *p;
7141   MatMatMPIAIJBACKEND   *mmdata;
7142   ISLocalToGlobalMapping P_oth_l2g = NULL;
7143   IS                     glob      = NULL;
7144   const char            *prefix;
7145   char                   pprefix[256];
7146   const PetscInt        *globidx, *P_oth_idx;
7147   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7148   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7149   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7150                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7151                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7152   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7153 
7154   MatProductType ptype;
7155   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7156   PetscMPIInt    size;
7157 
7158   PetscFunctionBegin;
7159   MatCheckProduct(C, 1);
7160   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7161   ptype = product->type;
7162   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7163     ptype                                          = MATPRODUCT_AB;
7164     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7165   }
7166   switch (ptype) {
7167   case MATPRODUCT_AB:
7168     A          = product->A;
7169     P          = product->B;
7170     m          = A->rmap->n;
7171     n          = P->cmap->n;
7172     M          = A->rmap->N;
7173     N          = P->cmap->N;
7174     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7175     break;
7176   case MATPRODUCT_AtB:
7177     P          = product->A;
7178     A          = product->B;
7179     m          = P->cmap->n;
7180     n          = A->cmap->n;
7181     M          = P->cmap->N;
7182     N          = A->cmap->N;
7183     hasoffproc = PETSC_TRUE;
7184     break;
7185   case MATPRODUCT_PtAP:
7186     A          = product->A;
7187     P          = product->B;
7188     m          = P->cmap->n;
7189     n          = P->cmap->n;
7190     M          = P->cmap->N;
7191     N          = P->cmap->N;
7192     hasoffproc = PETSC_TRUE;
7193     break;
7194   default:
7195     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7196   }
7197   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7198   if (size == 1) hasoffproc = PETSC_FALSE;
7199 
7200   /* defaults */
7201   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7202     mp[i]    = NULL;
7203     mptmp[i] = PETSC_FALSE;
7204     rmapt[i] = -1;
7205     cmapt[i] = -1;
7206     rmapa[i] = NULL;
7207     cmapa[i] = NULL;
7208   }
7209 
7210   /* customization */
7211   PetscCall(PetscNew(&mmdata));
7212   mmdata->reusesym = product->api_user;
7213   if (ptype == MATPRODUCT_AB) {
7214     if (product->api_user) {
7215       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7216       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7217       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7218       PetscOptionsEnd();
7219     } else {
7220       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7221       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7222       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7223       PetscOptionsEnd();
7224     }
7225   } else if (ptype == MATPRODUCT_PtAP) {
7226     if (product->api_user) {
7227       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7228       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7229       PetscOptionsEnd();
7230     } else {
7231       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7232       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7233       PetscOptionsEnd();
7234     }
7235   }
7236   a = (Mat_MPIAIJ *)A->data;
7237   p = (Mat_MPIAIJ *)P->data;
7238   PetscCall(MatSetSizes(C, m, n, M, N));
7239   PetscCall(PetscLayoutSetUp(C->rmap));
7240   PetscCall(PetscLayoutSetUp(C->cmap));
7241   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7242   PetscCall(MatGetOptionsPrefix(C, &prefix));
7243 
7244   cp = 0;
7245   switch (ptype) {
7246   case MATPRODUCT_AB: /* A * P */
7247     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7248 
7249     /* A_diag * P_local (merged or not) */
7250     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7251       /* P is product->B */
7252       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7253       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7254       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7255       PetscCall(MatProductSetFill(mp[cp], product->fill));
7256       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7257       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7258       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7259       mp[cp]->product->api_user = product->api_user;
7260       PetscCall(MatProductSetFromOptions(mp[cp]));
7261       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7262       PetscCall(ISGetIndices(glob, &globidx));
7263       rmapt[cp] = 1;
7264       cmapt[cp] = 2;
7265       cmapa[cp] = globidx;
7266       mptmp[cp] = PETSC_FALSE;
7267       cp++;
7268     } else { /* A_diag * P_diag and A_diag * P_off */
7269       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7270       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7271       PetscCall(MatProductSetFill(mp[cp], product->fill));
7272       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7273       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7274       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7275       mp[cp]->product->api_user = product->api_user;
7276       PetscCall(MatProductSetFromOptions(mp[cp]));
7277       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7278       rmapt[cp] = 1;
7279       cmapt[cp] = 1;
7280       mptmp[cp] = PETSC_FALSE;
7281       cp++;
7282       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7283       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7284       PetscCall(MatProductSetFill(mp[cp], product->fill));
7285       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7286       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7287       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7288       mp[cp]->product->api_user = product->api_user;
7289       PetscCall(MatProductSetFromOptions(mp[cp]));
7290       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7291       rmapt[cp] = 1;
7292       cmapt[cp] = 2;
7293       cmapa[cp] = p->garray;
7294       mptmp[cp] = PETSC_FALSE;
7295       cp++;
7296     }
7297 
7298     /* A_off * P_other */
7299     if (mmdata->P_oth) {
7300       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7301       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7302       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7303       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7304       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7305       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7306       PetscCall(MatProductSetFill(mp[cp], product->fill));
7307       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7308       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7309       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7310       mp[cp]->product->api_user = product->api_user;
7311       PetscCall(MatProductSetFromOptions(mp[cp]));
7312       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7313       rmapt[cp] = 1;
7314       cmapt[cp] = 2;
7315       cmapa[cp] = P_oth_idx;
7316       mptmp[cp] = PETSC_FALSE;
7317       cp++;
7318     }
7319     break;
7320 
7321   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7322     /* A is product->B */
7323     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7324     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7325       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7326       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7327       PetscCall(MatProductSetFill(mp[cp], product->fill));
7328       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7329       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7330       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7331       mp[cp]->product->api_user = product->api_user;
7332       PetscCall(MatProductSetFromOptions(mp[cp]));
7333       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7334       PetscCall(ISGetIndices(glob, &globidx));
7335       rmapt[cp] = 2;
7336       rmapa[cp] = globidx;
7337       cmapt[cp] = 2;
7338       cmapa[cp] = globidx;
7339       mptmp[cp] = PETSC_FALSE;
7340       cp++;
7341     } else {
7342       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7343       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7344       PetscCall(MatProductSetFill(mp[cp], product->fill));
7345       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7346       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7347       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7348       mp[cp]->product->api_user = product->api_user;
7349       PetscCall(MatProductSetFromOptions(mp[cp]));
7350       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7351       PetscCall(ISGetIndices(glob, &globidx));
7352       rmapt[cp] = 1;
7353       cmapt[cp] = 2;
7354       cmapa[cp] = globidx;
7355       mptmp[cp] = PETSC_FALSE;
7356       cp++;
7357       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7358       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7359       PetscCall(MatProductSetFill(mp[cp], product->fill));
7360       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7361       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7362       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7363       mp[cp]->product->api_user = product->api_user;
7364       PetscCall(MatProductSetFromOptions(mp[cp]));
7365       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7366       rmapt[cp] = 2;
7367       rmapa[cp] = p->garray;
7368       cmapt[cp] = 2;
7369       cmapa[cp] = globidx;
7370       mptmp[cp] = PETSC_FALSE;
7371       cp++;
7372     }
7373     break;
7374   case MATPRODUCT_PtAP:
7375     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7376     /* P is product->B */
7377     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7378     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7379     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7380     PetscCall(MatProductSetFill(mp[cp], product->fill));
7381     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7382     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7383     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7384     mp[cp]->product->api_user = product->api_user;
7385     PetscCall(MatProductSetFromOptions(mp[cp]));
7386     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7387     PetscCall(ISGetIndices(glob, &globidx));
7388     rmapt[cp] = 2;
7389     rmapa[cp] = globidx;
7390     cmapt[cp] = 2;
7391     cmapa[cp] = globidx;
7392     mptmp[cp] = PETSC_FALSE;
7393     cp++;
7394     if (mmdata->P_oth) {
7395       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7396       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7397       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7398       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7399       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7400       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7401       PetscCall(MatProductSetFill(mp[cp], product->fill));
7402       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7403       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7404       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7405       mp[cp]->product->api_user = product->api_user;
7406       PetscCall(MatProductSetFromOptions(mp[cp]));
7407       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7408       mptmp[cp] = PETSC_TRUE;
7409       cp++;
7410       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7411       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7412       PetscCall(MatProductSetFill(mp[cp], product->fill));
7413       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7414       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7415       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7416       mp[cp]->product->api_user = product->api_user;
7417       PetscCall(MatProductSetFromOptions(mp[cp]));
7418       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7419       rmapt[cp] = 2;
7420       rmapa[cp] = globidx;
7421       cmapt[cp] = 2;
7422       cmapa[cp] = P_oth_idx;
7423       mptmp[cp] = PETSC_FALSE;
7424       cp++;
7425     }
7426     break;
7427   default:
7428     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7429   }
7430   /* sanity check */
7431   if (size > 1)
7432     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7433 
7434   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7435   for (i = 0; i < cp; i++) {
7436     mmdata->mp[i]    = mp[i];
7437     mmdata->mptmp[i] = mptmp[i];
7438   }
7439   mmdata->cp             = cp;
7440   C->product->data       = mmdata;
7441   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7442   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7443 
7444   /* memory type */
7445   mmdata->mtype = PETSC_MEMTYPE_HOST;
7446   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7447   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7448   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7449   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7450   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7451   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7452 
7453   /* prepare coo coordinates for values insertion */
7454 
7455   /* count total nonzeros of those intermediate seqaij Mats
7456     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7457     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7458     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7459   */
7460   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7461     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7462     if (mptmp[cp]) continue;
7463     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7464       const PetscInt *rmap = rmapa[cp];
7465       const PetscInt  mr   = mp[cp]->rmap->n;
7466       const PetscInt  rs   = C->rmap->rstart;
7467       const PetscInt  re   = C->rmap->rend;
7468       const PetscInt *ii   = mm->i;
7469       for (i = 0; i < mr; i++) {
7470         const PetscInt gr = rmap[i];
7471         const PetscInt nz = ii[i + 1] - ii[i];
7472         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7473         else ncoo_oown += nz;                  /* this row is local */
7474       }
7475     } else ncoo_d += mm->nz;
7476   }
7477 
7478   /*
7479     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7480 
7481     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7482 
7483     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7484 
7485     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7486     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7487     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7488 
7489     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7490     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7491   */
7492   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7493   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7494 
7495   /* gather (i,j) of nonzeros inserted by remote procs */
7496   if (hasoffproc) {
7497     PetscSF  msf;
7498     PetscInt ncoo2, *coo_i2, *coo_j2;
7499 
7500     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7501     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7502     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7503 
7504     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7505       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7506       PetscInt   *idxoff = mmdata->off[cp];
7507       PetscInt   *idxown = mmdata->own[cp];
7508       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7509         const PetscInt *rmap = rmapa[cp];
7510         const PetscInt *cmap = cmapa[cp];
7511         const PetscInt *ii   = mm->i;
7512         PetscInt       *coi  = coo_i + ncoo_o;
7513         PetscInt       *coj  = coo_j + ncoo_o;
7514         const PetscInt  mr   = mp[cp]->rmap->n;
7515         const PetscInt  rs   = C->rmap->rstart;
7516         const PetscInt  re   = C->rmap->rend;
7517         const PetscInt  cs   = C->cmap->rstart;
7518         for (i = 0; i < mr; i++) {
7519           const PetscInt *jj = mm->j + ii[i];
7520           const PetscInt  gr = rmap[i];
7521           const PetscInt  nz = ii[i + 1] - ii[i];
7522           if (gr < rs || gr >= re) { /* this is an offproc row */
7523             for (j = ii[i]; j < ii[i + 1]; j++) {
7524               *coi++    = gr;
7525               *idxoff++ = j;
7526             }
7527             if (!cmapt[cp]) { /* already global */
7528               for (j = 0; j < nz; j++) *coj++ = jj[j];
7529             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7530               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7531             } else { /* offdiag */
7532               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7533             }
7534             ncoo_o += nz;
7535           } else { /* this is a local row */
7536             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7537           }
7538         }
7539       }
7540       mmdata->off[cp + 1] = idxoff;
7541       mmdata->own[cp + 1] = idxown;
7542     }
7543 
7544     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7545     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7546     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7547     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7548     ncoo = ncoo_d + ncoo_oown + ncoo2;
7549     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7550     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7551     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7552     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7553     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7554     PetscCall(PetscFree2(coo_i, coo_j));
7555     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7556     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7557     coo_i = coo_i2;
7558     coo_j = coo_j2;
7559   } else { /* no offproc values insertion */
7560     ncoo = ncoo_d;
7561     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7562 
7563     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7564     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7565     PetscCall(PetscSFSetUp(mmdata->sf));
7566   }
7567   mmdata->hasoffproc = hasoffproc;
7568 
7569   /* gather (i,j) of nonzeros inserted locally */
7570   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7571     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7572     PetscInt       *coi  = coo_i + ncoo_d;
7573     PetscInt       *coj  = coo_j + ncoo_d;
7574     const PetscInt *jj   = mm->j;
7575     const PetscInt *ii   = mm->i;
7576     const PetscInt *cmap = cmapa[cp];
7577     const PetscInt *rmap = rmapa[cp];
7578     const PetscInt  mr   = mp[cp]->rmap->n;
7579     const PetscInt  rs   = C->rmap->rstart;
7580     const PetscInt  re   = C->rmap->rend;
7581     const PetscInt  cs   = C->cmap->rstart;
7582 
7583     if (mptmp[cp]) continue;
7584     if (rmapt[cp] == 1) { /* consecutive rows */
7585       /* fill coo_i */
7586       for (i = 0; i < mr; i++) {
7587         const PetscInt gr = i + rs;
7588         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7589       }
7590       /* fill coo_j */
7591       if (!cmapt[cp]) { /* type-0, already global */
7592         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7593       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7594         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7595       } else {                                            /* type-2, local to global for sparse columns */
7596         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7597       }
7598       ncoo_d += mm->nz;
7599     } else if (rmapt[cp] == 2) { /* sparse rows */
7600       for (i = 0; i < mr; i++) {
7601         const PetscInt *jj = mm->j + ii[i];
7602         const PetscInt  gr = rmap[i];
7603         const PetscInt  nz = ii[i + 1] - ii[i];
7604         if (gr >= rs && gr < re) { /* local rows */
7605           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7606           if (!cmapt[cp]) { /* type-0, already global */
7607             for (j = 0; j < nz; j++) *coj++ = jj[j];
7608           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7609             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7610           } else { /* type-2, local to global for sparse columns */
7611             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7612           }
7613           ncoo_d += nz;
7614         }
7615       }
7616     }
7617   }
7618   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7619   PetscCall(ISDestroy(&glob));
7620   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7621   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7622   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7623   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7624 
7625   /* preallocate with COO data */
7626   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7627   PetscCall(PetscFree2(coo_i, coo_j));
7628   PetscFunctionReturn(PETSC_SUCCESS);
7629 }
7630 
7631 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7632 {
7633   Mat_Product *product = mat->product;
7634 #if defined(PETSC_HAVE_DEVICE)
7635   PetscBool match  = PETSC_FALSE;
7636   PetscBool usecpu = PETSC_FALSE;
7637 #else
7638   PetscBool match = PETSC_TRUE;
7639 #endif
7640 
7641   PetscFunctionBegin;
7642   MatCheckProduct(mat, 1);
7643 #if defined(PETSC_HAVE_DEVICE)
7644   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7645   if (match) { /* we can always fallback to the CPU if requested */
7646     switch (product->type) {
7647     case MATPRODUCT_AB:
7648       if (product->api_user) {
7649         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7650         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7651         PetscOptionsEnd();
7652       } else {
7653         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7654         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7655         PetscOptionsEnd();
7656       }
7657       break;
7658     case MATPRODUCT_AtB:
7659       if (product->api_user) {
7660         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7661         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7662         PetscOptionsEnd();
7663       } else {
7664         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7665         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7666         PetscOptionsEnd();
7667       }
7668       break;
7669     case MATPRODUCT_PtAP:
7670       if (product->api_user) {
7671         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7672         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7673         PetscOptionsEnd();
7674       } else {
7675         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7676         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7677         PetscOptionsEnd();
7678       }
7679       break;
7680     default:
7681       break;
7682     }
7683     match = (PetscBool)!usecpu;
7684   }
7685 #endif
7686   if (match) {
7687     switch (product->type) {
7688     case MATPRODUCT_AB:
7689     case MATPRODUCT_AtB:
7690     case MATPRODUCT_PtAP:
7691       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7692       break;
7693     default:
7694       break;
7695     }
7696   }
7697   /* fallback to MPIAIJ ops */
7698   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7699   PetscFunctionReturn(PETSC_SUCCESS);
7700 }
7701 
7702 /*
7703    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7704 
7705    n - the number of block indices in cc[]
7706    cc - the block indices (must be large enough to contain the indices)
7707 */
7708 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7709 {
7710   PetscInt        cnt = -1, nidx, j;
7711   const PetscInt *idx;
7712 
7713   PetscFunctionBegin;
7714   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7715   if (nidx) {
7716     cnt     = 0;
7717     cc[cnt] = idx[0] / bs;
7718     for (j = 1; j < nidx; j++) {
7719       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7720     }
7721   }
7722   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7723   *n = cnt + 1;
7724   PetscFunctionReturn(PETSC_SUCCESS);
7725 }
7726 
7727 /*
7728     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7729 
7730     ncollapsed - the number of block indices
7731     collapsed - the block indices (must be large enough to contain the indices)
7732 */
7733 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7734 {
7735   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7736 
7737   PetscFunctionBegin;
7738   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7739   for (i = start + 1; i < start + bs; i++) {
7740     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7741     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7742     cprevtmp = cprev;
7743     cprev    = merged;
7744     merged   = cprevtmp;
7745   }
7746   *ncollapsed = nprev;
7747   if (collapsed) *collapsed = cprev;
7748   PetscFunctionReturn(PETSC_SUCCESS);
7749 }
7750 
7751 /*
7752  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7753 
7754  Input Parameter:
7755  . Amat - matrix
7756  - symmetrize - make the result symmetric
7757  + scale - scale with diagonal
7758 
7759  Output Parameter:
7760  . a_Gmat - output scalar graph >= 0
7761 
7762 */
7763 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7764 {
7765   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7766   MPI_Comm  comm;
7767   Mat       Gmat;
7768   PetscBool ismpiaij, isseqaij;
7769   Mat       a, b, c;
7770   MatType   jtype;
7771 
7772   PetscFunctionBegin;
7773   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7774   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7775   PetscCall(MatGetSize(Amat, &MM, &NN));
7776   PetscCall(MatGetBlockSize(Amat, &bs));
7777   nloc = (Iend - Istart) / bs;
7778 
7779   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7780   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7781   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7782 
7783   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7784   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7785      implementation */
7786   if (bs > 1) {
7787     PetscCall(MatGetType(Amat, &jtype));
7788     PetscCall(MatCreate(comm, &Gmat));
7789     PetscCall(MatSetType(Gmat, jtype));
7790     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7791     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7792     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7793       PetscInt  *d_nnz, *o_nnz;
7794       MatScalar *aa, val, *AA;
7795       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7796       if (isseqaij) {
7797         a = Amat;
7798         b = NULL;
7799       } else {
7800         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7801         a             = d->A;
7802         b             = d->B;
7803       }
7804       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7805       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7806       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7807         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7808         const PetscInt *cols1, *cols2;
7809         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7810           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7811           nnz[brow / bs] = nc2 / bs;
7812           if (nc2 % bs) ok = 0;
7813           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7814           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7815             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7816             if (nc1 != nc2) ok = 0;
7817             else {
7818               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7819                 if (cols1[jj] != cols2[jj]) ok = 0;
7820                 if (cols1[jj] % bs != jj % bs) ok = 0;
7821               }
7822             }
7823             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7824           }
7825           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7826           if (!ok) {
7827             PetscCall(PetscFree2(d_nnz, o_nnz));
7828             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7829             goto old_bs;
7830           }
7831         }
7832       }
7833       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7834       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7835       PetscCall(PetscFree2(d_nnz, o_nnz));
7836       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7837       // diag
7838       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7839         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7840         ai               = aseq->i;
7841         n                = ai[brow + 1] - ai[brow];
7842         aj               = aseq->j + ai[brow];
7843         for (int k = 0; k < n; k += bs) {        // block columns
7844           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7845           val        = 0;
7846           if (index_size == 0) {
7847             for (int ii = 0; ii < bs; ii++) { // rows in block
7848               aa = aseq->a + ai[brow + ii] + k;
7849               for (int jj = 0; jj < bs; jj++) {         // columns in block
7850                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7851               }
7852             }
7853           } else {                                       // use (index,index) value if provided
7854             for (int iii = 0; iii < index_size; iii++) { // rows in block
7855               int ii = index[iii];
7856               aa     = aseq->a + ai[brow + ii] + k;
7857               for (int jjj = 0; jjj < index_size; jjj++) { // columns in block
7858                 int jj = index[jjj];
7859                 val += PetscAbs(PetscRealPart(aa[jj]));
7860               }
7861             }
7862           }
7863           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7864           AA[k / bs] = val;
7865         }
7866         grow = Istart / bs + brow / bs;
7867         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7868       }
7869       // off-diag
7870       if (ismpiaij) {
7871         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7872         const PetscScalar *vals;
7873         const PetscInt    *cols, *garray = aij->garray;
7874         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7875         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7876           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7877           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7878             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7879             AA[k / bs] = 0;
7880             AJ[cidx]   = garray[cols[k]] / bs;
7881           }
7882           nc = ncols / bs;
7883           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7884           if (index_size == 0) {
7885             for (int ii = 0; ii < bs; ii++) { // rows in block
7886               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7887               for (int k = 0; k < ncols; k += bs) {
7888                 for (int jj = 0; jj < bs; jj++) { // cols in block
7889                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7890                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7891                 }
7892               }
7893               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7894             }
7895           } else {                                       // use (index,index) value if provided
7896             for (int iii = 0; iii < index_size; iii++) { // rows in block
7897               int ii = index[iii];
7898               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7899               for (int k = 0; k < ncols; k += bs) {
7900                 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block
7901                   int jj = index[jjj];
7902                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7903                 }
7904               }
7905               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7906             }
7907           }
7908           grow = Istart / bs + brow / bs;
7909           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7910         }
7911       }
7912       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7913       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7914       PetscCall(PetscFree2(AA, AJ));
7915     } else {
7916       const PetscScalar *vals;
7917       const PetscInt    *idx;
7918       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7919     old_bs:
7920       /*
7921        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7922        */
7923       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7924       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7925       if (isseqaij) {
7926         PetscInt max_d_nnz;
7927         /*
7928          Determine exact preallocation count for (sequential) scalar matrix
7929          */
7930         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7931         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7932         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7933         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7934         PetscCall(PetscFree3(w0, w1, w2));
7935       } else if (ismpiaij) {
7936         Mat             Daij, Oaij;
7937         const PetscInt *garray;
7938         PetscInt        max_d_nnz;
7939         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7940         /*
7941          Determine exact preallocation count for diagonal block portion of scalar matrix
7942          */
7943         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7944         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7945         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7946         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7947         PetscCall(PetscFree3(w0, w1, w2));
7948         /*
7949          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7950          */
7951         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7952           o_nnz[jj] = 0;
7953           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7954             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7955             o_nnz[jj] += ncols;
7956             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7957           }
7958           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7959         }
7960       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7961       /* get scalar copy (norms) of matrix */
7962       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7963       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7964       PetscCall(PetscFree2(d_nnz, o_nnz));
7965       for (Ii = Istart; Ii < Iend; Ii++) {
7966         PetscInt dest_row = Ii / bs;
7967         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7968         for (jj = 0; jj < ncols; jj++) {
7969           PetscInt    dest_col = idx[jj] / bs;
7970           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7971           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7972         }
7973         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7974       }
7975       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7976       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7977     }
7978   } else {
7979     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7980     else {
7981       Gmat = Amat;
7982       PetscCall(PetscObjectReference((PetscObject)Gmat));
7983     }
7984     if (isseqaij) {
7985       a = Gmat;
7986       b = NULL;
7987     } else {
7988       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7989       a             = d->A;
7990       b             = d->B;
7991     }
7992     if (filter >= 0 || scale) {
7993       /* take absolute value of each entry */
7994       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7995         MatInfo      info;
7996         PetscScalar *avals;
7997         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7998         PetscCall(MatSeqAIJGetArray(c, &avals));
7999         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8000         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8001       }
8002     }
8003   }
8004   if (symmetrize) {
8005     PetscBool isset, issym;
8006     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8007     if (!isset || !issym) {
8008       Mat matTrans;
8009       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8010       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8011       PetscCall(MatDestroy(&matTrans));
8012     }
8013     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8014   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8015   if (scale) {
8016     /* scale c for all diagonal values = 1 or -1 */
8017     Vec diag;
8018     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8019     PetscCall(MatGetDiagonal(Gmat, diag));
8020     PetscCall(VecReciprocal(diag));
8021     PetscCall(VecSqrtAbs(diag));
8022     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8023     PetscCall(VecDestroy(&diag));
8024   }
8025   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8026 
8027   if (filter >= 0) {
8028     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8029     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8030   }
8031   *a_Gmat = Gmat;
8032   PetscFunctionReturn(PETSC_SUCCESS);
8033 }
8034 
8035 /*
8036     Special version for direct calls from Fortran
8037 */
8038 #include <petsc/private/fortranimpl.h>
8039 
8040 /* Change these macros so can be used in void function */
8041 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8042 #undef PetscCall
8043 #define PetscCall(...) \
8044   do { \
8045     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8046     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8047       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8048       return; \
8049     } \
8050   } while (0)
8051 
8052 #undef SETERRQ
8053 #define SETERRQ(comm, ierr, ...) \
8054   do { \
8055     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8056     return; \
8057   } while (0)
8058 
8059 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8060   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8061 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8062   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8063 #else
8064 #endif
8065 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8066 {
8067   Mat         mat = *mmat;
8068   PetscInt    m = *mm, n = *mn;
8069   InsertMode  addv = *maddv;
8070   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8071   PetscScalar value;
8072 
8073   MatCheckPreallocated(mat, 1);
8074   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8075   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8076   {
8077     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8078     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8079     PetscBool roworiented = aij->roworiented;
8080 
8081     /* Some Variables required in the macro */
8082     Mat         A     = aij->A;
8083     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8084     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8085     MatScalar  *aa;
8086     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8087     Mat         B                 = aij->B;
8088     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8089     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8090     MatScalar  *ba;
8091     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8092      * cannot use "#if defined" inside a macro. */
8093     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8094 
8095     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8096     PetscInt   nonew = a->nonew;
8097     MatScalar *ap1, *ap2;
8098 
8099     PetscFunctionBegin;
8100     PetscCall(MatSeqAIJGetArray(A, &aa));
8101     PetscCall(MatSeqAIJGetArray(B, &ba));
8102     for (i = 0; i < m; i++) {
8103       if (im[i] < 0) continue;
8104       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8105       if (im[i] >= rstart && im[i] < rend) {
8106         row      = im[i] - rstart;
8107         lastcol1 = -1;
8108         rp1      = aj + ai[row];
8109         ap1      = aa + ai[row];
8110         rmax1    = aimax[row];
8111         nrow1    = ailen[row];
8112         low1     = 0;
8113         high1    = nrow1;
8114         lastcol2 = -1;
8115         rp2      = bj + bi[row];
8116         ap2      = ba + bi[row];
8117         rmax2    = bimax[row];
8118         nrow2    = bilen[row];
8119         low2     = 0;
8120         high2    = nrow2;
8121 
8122         for (j = 0; j < n; j++) {
8123           if (roworiented) value = v[i * n + j];
8124           else value = v[i + j * m];
8125           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8126           if (in[j] >= cstart && in[j] < cend) {
8127             col = in[j] - cstart;
8128             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8129           } else if (in[j] < 0) continue;
8130           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8131             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8132           } else {
8133             if (mat->was_assembled) {
8134               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8135 #if defined(PETSC_USE_CTABLE)
8136               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8137               col--;
8138 #else
8139               col = aij->colmap[in[j]] - 1;
8140 #endif
8141               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8142                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8143                 col = in[j];
8144                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8145                 B        = aij->B;
8146                 b        = (Mat_SeqAIJ *)B->data;
8147                 bimax    = b->imax;
8148                 bi       = b->i;
8149                 bilen    = b->ilen;
8150                 bj       = b->j;
8151                 rp2      = bj + bi[row];
8152                 ap2      = ba + bi[row];
8153                 rmax2    = bimax[row];
8154                 nrow2    = bilen[row];
8155                 low2     = 0;
8156                 high2    = nrow2;
8157                 bm       = aij->B->rmap->n;
8158                 ba       = b->a;
8159                 inserted = PETSC_FALSE;
8160               }
8161             } else col = in[j];
8162             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8163           }
8164         }
8165       } else if (!aij->donotstash) {
8166         if (roworiented) {
8167           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8168         } else {
8169           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8170         }
8171       }
8172     }
8173     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8174     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8175   }
8176   PetscFunctionReturnVoid();
8177 }
8178 
8179 /* Undefining these here since they were redefined from their original definition above! No
8180  * other PETSc functions should be defined past this point, as it is impossible to recover the
8181  * original definitions */
8182 #undef PetscCall
8183 #undef SETERRQ
8184