xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6e415bd26dfd0d33b8d7233bdee8a51de4844a26)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
170 {
171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
172 
173   PetscFunctionBegin;
174   if (mat->A) {
175     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
176     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
182 {
183   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
184   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
185   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
186   const PetscInt  *ia, *ib;
187   const MatScalar *aa, *bb, *aav, *bav;
188   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
189   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
190 
191   PetscFunctionBegin;
192   *keptrows = NULL;
193 
194   ia = a->i;
195   ib = b->i;
196   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
197   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
198   for (i = 0; i < m; i++) {
199     na = ia[i + 1] - ia[i];
200     nb = ib[i + 1] - ib[i];
201     if (!na && !nb) {
202       cnt++;
203       goto ok1;
204     }
205     aa = aav + ia[i];
206     for (j = 0; j < na; j++) {
207       if (aa[j] != 0.0) goto ok1;
208     }
209     bb = PetscSafePointerPlusOffset(bav, ib[i]);
210     for (j = 0; j < nb; j++) {
211       if (bb[j] != 0.0) goto ok1;
212     }
213     cnt++;
214   ok1:;
215   }
216   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
217   if (!n0rows) {
218     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
220     PetscFunctionReturn(PETSC_SUCCESS);
221   }
222   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
223   cnt = 0;
224   for (i = 0; i < m; i++) {
225     na = ia[i + 1] - ia[i];
226     nb = ib[i + 1] - ib[i];
227     if (!na && !nb) continue;
228     aa = aav + ia[i];
229     for (j = 0; j < na; j++) {
230       if (aa[j] != 0.0) {
231         rows[cnt++] = rstart + i;
232         goto ok2;
233       }
234     }
235     bb = PetscSafePointerPlusOffset(bav, ib[i]);
236     for (j = 0; j < nb; j++) {
237       if (bb[j] != 0.0) {
238         rows[cnt++] = rstart + i;
239         goto ok2;
240       }
241     }
242   ok2:;
243   }
244   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
245   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
251 {
252   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
253   PetscBool   cong;
254 
255   PetscFunctionBegin;
256   PetscCall(MatHasCongruentLayouts(Y, &cong));
257   if (Y->assembled && cong) {
258     PetscCall(MatDiagonalSet(aij->A, D, is));
259   } else {
260     PetscCall(MatDiagonalSet_Default(Y, D, is));
261   }
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
266 {
267   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
268   PetscInt    i, rstart, nrows, *rows;
269 
270   PetscFunctionBegin;
271   *zrows = NULL;
272   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
273   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
274   for (i = 0; i < nrows; i++) rows[i] += rstart;
275   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
280 {
281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
282   PetscInt           i, m, n, *garray = aij->garray;
283   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
284   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
285   PetscReal         *work;
286   const PetscScalar *dummy;
287   PetscMPIInt        in;
288 
289   PetscFunctionBegin;
290   PetscCall(MatGetSize(A, &m, &n));
291   PetscCall(PetscCalloc1(n, &work));
292   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
294   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
296   if (type == NORM_2) {
297     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
298     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
299   } else if (type == NORM_1) {
300     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
301     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
302   } else if (type == NORM_INFINITY) {
303     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
304     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
306     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
307     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
308   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
309     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
310     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
311   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
312   PetscCall(PetscMPIIntCast(n, &in));
313   if (type == NORM_INFINITY) {
314     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
315   } else {
316     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
317   }
318   PetscCall(PetscFree(work));
319   if (type == NORM_2) {
320     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
321   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
322     for (i = 0; i < n; i++) reductions[i] /= m;
323   }
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
328 {
329   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
330   IS              sis, gis;
331   const PetscInt *isis, *igis;
332   PetscInt        n, *iis, nsis, ngis, rstart, i;
333 
334   PetscFunctionBegin;
335   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
336   PetscCall(MatFindNonzeroRows(a->B, &gis));
337   PetscCall(ISGetSize(gis, &ngis));
338   PetscCall(ISGetSize(sis, &nsis));
339   PetscCall(ISGetIndices(sis, &isis));
340   PetscCall(ISGetIndices(gis, &igis));
341 
342   PetscCall(PetscMalloc1(ngis + nsis, &iis));
343   PetscCall(PetscArraycpy(iis, igis, ngis));
344   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
345   n = ngis + nsis;
346   PetscCall(PetscSortRemoveDupsInt(&n, iis));
347   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
348   for (i = 0; i < n; i++) iis[i] += rstart;
349   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
350 
351   PetscCall(ISRestoreIndices(sis, &isis));
352   PetscCall(ISRestoreIndices(gis, &igis));
353   PetscCall(ISDestroy(&sis));
354   PetscCall(ISDestroy(&gis));
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 /*
359   Local utility routine that creates a mapping from the global column
360 number to the local number in the off-diagonal part of the local
361 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
362 a slightly higher hash table cost; without it it is not scalable (each processor
363 has an order N integer array but is fast to access.
364 */
365 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
366 {
367   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
368   PetscInt    n   = aij->B->cmap->n, i;
369 
370   PetscFunctionBegin;
371   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
372 #if defined(PETSC_USE_CTABLE)
373   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
374   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
375 #else
376   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
377   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
378 #endif
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
383   do { \
384     if (col <= lastcol1) low1 = 0; \
385     else high1 = nrow1; \
386     lastcol1 = col; \
387     while (high1 - low1 > 5) { \
388       t = (low1 + high1) / 2; \
389       if (rp1[t] > col) high1 = t; \
390       else low1 = t; \
391     } \
392     for (_i = low1; _i < high1; _i++) { \
393       if (rp1[_i] > col) break; \
394       if (rp1[_i] == col) { \
395         if (addv == ADD_VALUES) { \
396           ap1[_i] += value; \
397           /* Not sure LogFlops will slow dow the code or not */ \
398           (void)PetscLogFlops(1.0); \
399         } else ap1[_i] = value; \
400         goto a_noinsert; \
401       } \
402     } \
403     if (value == 0.0 && ignorezeroentries && row != col) { \
404       low1  = 0; \
405       high1 = nrow1; \
406       goto a_noinsert; \
407     } \
408     if (nonew == 1) { \
409       low1  = 0; \
410       high1 = nrow1; \
411       goto a_noinsert; \
412     } \
413     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
414     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
415     N = nrow1++ - 1; \
416     a->nz++; \
417     high1++; \
418     /* shift up all the later entries in this row */ \
419     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
420     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
421     rp1[_i] = col; \
422     ap1[_i] = value; \
423   a_noinsert:; \
424     ailen[row] = nrow1; \
425   } while (0)
426 
427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
428   do { \
429     if (col <= lastcol2) low2 = 0; \
430     else high2 = nrow2; \
431     lastcol2 = col; \
432     while (high2 - low2 > 5) { \
433       t = (low2 + high2) / 2; \
434       if (rp2[t] > col) high2 = t; \
435       else low2 = t; \
436     } \
437     for (_i = low2; _i < high2; _i++) { \
438       if (rp2[_i] > col) break; \
439       if (rp2[_i] == col) { \
440         if (addv == ADD_VALUES) { \
441           ap2[_i] += value; \
442           (void)PetscLogFlops(1.0); \
443         } else ap2[_i] = value; \
444         goto b_noinsert; \
445       } \
446     } \
447     if (value == 0.0 && ignorezeroentries) { \
448       low2  = 0; \
449       high2 = nrow2; \
450       goto b_noinsert; \
451     } \
452     if (nonew == 1) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
458     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
459     N = nrow2++ - 1; \
460     b->nz++; \
461     high2++; \
462     /* shift up all the later entries in this row */ \
463     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
464     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
465     rp2[_i] = col; \
466     ap2[_i] = value; \
467   b_noinsert:; \
468     bilen[row] = nrow2; \
469   } while (0)
470 
471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
472 {
473   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
474   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
475   PetscInt     l, *garray                         = mat->garray, diag;
476   PetscScalar *aa, *ba;
477 
478   PetscFunctionBegin;
479   /* code only works for square matrices A */
480 
481   /* find size of row to the left of the diagonal part */
482   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
483   row = row - diag;
484   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
485     if (garray[b->j[b->i[row] + l]] > diag) break;
486   }
487   if (l) {
488     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
489     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
490     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
491   }
492 
493   /* diagonal part */
494   if (a->i[row + 1] - a->i[row]) {
495     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
496     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
497     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
498   }
499 
500   /* right of diagonal part */
501   if (b->i[row + 1] - b->i[row] - l) {
502     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
503     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
504     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
505   }
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
510 {
511   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
512   PetscScalar value = 0.0;
513   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
514   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
515   PetscBool   roworiented = aij->roworiented;
516 
517   /* Some Variables required in the macro */
518   Mat         A     = aij->A;
519   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
520   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
521   PetscBool   ignorezeroentries = a->ignorezeroentries;
522   Mat         B                 = aij->B;
523   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
524   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
525   MatScalar  *aa, *ba;
526   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
527   PetscInt    nonew;
528   MatScalar  *ap1, *ap2;
529 
530   PetscFunctionBegin;
531   PetscCall(MatSeqAIJGetArray(A, &aa));
532   PetscCall(MatSeqAIJGetArray(B, &ba));
533   for (i = 0; i < m; i++) {
534     if (im[i] < 0) continue;
535     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
536     if (im[i] >= rstart && im[i] < rend) {
537       row      = im[i] - rstart;
538       lastcol1 = -1;
539       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
540       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
541       rmax1    = aimax[row];
542       nrow1    = ailen[row];
543       low1     = 0;
544       high1    = nrow1;
545       lastcol2 = -1;
546       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
547       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
548       rmax2    = bimax[row];
549       nrow2    = bilen[row];
550       low2     = 0;
551       high2    = nrow2;
552 
553       for (j = 0; j < n; j++) {
554         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
555         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
556         if (in[j] >= cstart && in[j] < cend) {
557           col   = in[j] - cstart;
558           nonew = a->nonew;
559           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
560         } else if (in[j] < 0) {
561           continue;
562         } else {
563           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
564           if (mat->was_assembled) {
565             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
566 #if defined(PETSC_USE_CTABLE)
567             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
568             col--;
569 #else
570             col = aij->colmap[in[j]] - 1;
571 #endif
572             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
573               PetscCall(MatDisAssemble_MPIAIJ(mat));               /* Change aij->B from reduced/local format to expanded/global format */
574               col = in[j];
575               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
576               B     = aij->B;
577               b     = (Mat_SeqAIJ *)B->data;
578               bimax = b->imax;
579               bi    = b->i;
580               bilen = b->ilen;
581               bj    = b->j;
582               ba    = b->a;
583               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
584               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
585               rmax2 = bimax[row];
586               nrow2 = bilen[row];
587               low2  = 0;
588               high2 = nrow2;
589               bm    = aij->B->rmap->n;
590               ba    = b->a;
591             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
592               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
593                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
594               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
595             }
596           } else col = in[j];
597           nonew = b->nonew;
598           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
599         }
600       }
601     } else {
602       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
603       if (!aij->donotstash) {
604         mat->assembled = PETSC_FALSE;
605         if (roworiented) {
606           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
607         } else {
608           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
609         }
610       }
611     }
612   }
613   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
614   PetscCall(MatSeqAIJRestoreArray(B, &ba));
615   PetscFunctionReturn(PETSC_SUCCESS);
616 }
617 
618 /*
619     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
620     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
621     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
622 */
623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
624 {
625   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
626   Mat         A      = aij->A; /* diagonal part of the matrix */
627   Mat         B      = aij->B; /* off-diagonal part of the matrix */
628   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
629   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
630   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
631   PetscInt   *ailen = a->ilen, *aj = a->j;
632   PetscInt   *bilen = b->ilen, *bj = b->j;
633   PetscInt    am          = aij->A->rmap->n, j;
634   PetscInt    diag_so_far = 0, dnz;
635   PetscInt    offd_so_far = 0, onz;
636 
637   PetscFunctionBegin;
638   /* Iterate over all rows of the matrix */
639   for (j = 0; j < am; j++) {
640     dnz = onz = 0;
641     /*  Iterate over all non-zero columns of the current row */
642     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
643       /* If column is in the diagonal */
644       if (mat_j[col] >= cstart && mat_j[col] < cend) {
645         aj[diag_so_far++] = mat_j[col] - cstart;
646         dnz++;
647       } else { /* off-diagonal entries */
648         bj[offd_so_far++] = mat_j[col];
649         onz++;
650       }
651     }
652     ailen[j] = dnz;
653     bilen[j] = onz;
654   }
655   PetscFunctionReturn(PETSC_SUCCESS);
656 }
657 
658 /*
659     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
660     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
661     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
662     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
663     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
664 */
665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
666 {
667   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
668   Mat          A    = aij->A; /* diagonal part of the matrix */
669   Mat          B    = aij->B; /* off-diagonal part of the matrix */
670   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
671   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
672   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
673   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
674   PetscInt    *ailen = a->ilen, *aj = a->j;
675   PetscInt    *bilen = b->ilen, *bj = b->j;
676   PetscInt     am          = aij->A->rmap->n, j;
677   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
678   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
679   PetscScalar *aa = a->a, *ba = b->a;
680 
681   PetscFunctionBegin;
682   /* Iterate over all rows of the matrix */
683   for (j = 0; j < am; j++) {
684     dnz_row = onz_row = 0;
685     rowstart_offd     = full_offd_i[j];
686     rowstart_diag     = full_diag_i[j];
687     /*  Iterate over all non-zero columns of the current row */
688     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
689       /* If column is in the diagonal */
690       if (mat_j[col] >= cstart && mat_j[col] < cend) {
691         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
692         aa[rowstart_diag + dnz_row] = mat_a[col];
693         dnz_row++;
694       } else { /* off-diagonal entries */
695         bj[rowstart_offd + onz_row] = mat_j[col];
696         ba[rowstart_offd + onz_row] = mat_a[col];
697         onz_row++;
698       }
699     }
700     ailen[j] = dnz_row;
701     bilen[j] = onz_row;
702   }
703   PetscFunctionReturn(PETSC_SUCCESS);
704 }
705 
706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
707 {
708   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
709   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
710   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
711 
712   PetscFunctionBegin;
713   for (i = 0; i < m; i++) {
714     if (idxm[i] < 0) continue; /* negative row */
715     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
716     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
717     row = idxm[i] - rstart;
718     for (j = 0; j < n; j++) {
719       if (idxn[j] < 0) continue; /* negative column */
720       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
721       if (idxn[j] >= cstart && idxn[j] < cend) {
722         col = idxn[j] - cstart;
723         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
724       } else {
725         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
726 #if defined(PETSC_USE_CTABLE)
727         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
728         col--;
729 #else
730         col = aij->colmap[idxn[j]] - 1;
731 #endif
732         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
733         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
734       }
735     }
736   }
737   PetscFunctionReturn(PETSC_SUCCESS);
738 }
739 
740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
741 {
742   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
743   PetscInt    nstash, reallocs;
744 
745   PetscFunctionBegin;
746   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
747 
748   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
749   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
750   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
751   PetscFunctionReturn(PETSC_SUCCESS);
752 }
753 
754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
755 {
756   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
757   PetscMPIInt  n;
758   PetscInt     i, j, rstart, ncols, flg;
759   PetscInt    *row, *col;
760   PetscBool    other_disassembled;
761   PetscScalar *val;
762 
763   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
764 
765   PetscFunctionBegin;
766   if (!aij->donotstash && !mat->nooffprocentries) {
767     while (1) {
768       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
769       if (!flg) break;
770 
771       for (i = 0; i < n;) {
772         /* Now identify the consecutive vals belonging to the same row */
773         for (j = i, rstart = row[j]; j < n; j++) {
774           if (row[j] != rstart) break;
775         }
776         if (j < n) ncols = j - i;
777         else ncols = n - i;
778         /* Now assemble all these values with a single function call */
779         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
780         i = j;
781       }
782     }
783     PetscCall(MatStashScatterEnd_Private(&mat->stash));
784   }
785 #if defined(PETSC_HAVE_DEVICE)
786   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
787   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
788   if (mat->boundtocpu) {
789     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
790     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
791   }
792 #endif
793   PetscCall(MatAssemblyBegin(aij->A, mode));
794   PetscCall(MatAssemblyEnd(aij->A, mode));
795 
796   /* determine if any processor has disassembled, if so we must
797      also disassemble ourself, in order that we may reassemble. */
798   /*
799      if nonzero structure of submatrix B cannot change then we know that
800      no processor disassembled thus we can skip this stuff
801   */
802   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
803     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
804     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
805       PetscCall(MatDisAssemble_MPIAIJ(mat));
806     }
807   }
808   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
809   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
810 #if defined(PETSC_HAVE_DEVICE)
811   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
812 #endif
813   PetscCall(MatAssemblyBegin(aij->B, mode));
814   PetscCall(MatAssemblyEnd(aij->B, mode));
815 
816   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
817 
818   aij->rowvalues = NULL;
819 
820   PetscCall(VecDestroy(&aij->diag));
821 
822   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
823   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
824     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
825     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
826   }
827 #if defined(PETSC_HAVE_DEVICE)
828   mat->offloadmask = PETSC_OFFLOAD_BOTH;
829 #endif
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
834 {
835   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
836 
837   PetscFunctionBegin;
838   PetscCall(MatZeroEntries(l->A));
839   PetscCall(MatZeroEntries(l->B));
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
844 {
845   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
846   PetscInt   *lrows;
847   PetscInt    r, len;
848   PetscBool   cong;
849 
850   PetscFunctionBegin;
851   /* get locally owned rows */
852   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
853   PetscCall(MatHasCongruentLayouts(A, &cong));
854   /* fix right-hand side if needed */
855   if (x && b) {
856     const PetscScalar *xx;
857     PetscScalar       *bb;
858 
859     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
860     PetscCall(VecGetArrayRead(x, &xx));
861     PetscCall(VecGetArray(b, &bb));
862     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
863     PetscCall(VecRestoreArrayRead(x, &xx));
864     PetscCall(VecRestoreArray(b, &bb));
865   }
866 
867   if (diag != 0.0 && cong) {
868     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
869     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
870   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
871     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
872     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
873     PetscInt    nnwA, nnwB;
874     PetscBool   nnzA, nnzB;
875 
876     nnwA = aijA->nonew;
877     nnwB = aijB->nonew;
878     nnzA = aijA->keepnonzeropattern;
879     nnzB = aijB->keepnonzeropattern;
880     if (!nnzA) {
881       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
882       aijA->nonew = 0;
883     }
884     if (!nnzB) {
885       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
886       aijB->nonew = 0;
887     }
888     /* Must zero here before the next loop */
889     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
890     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
891     for (r = 0; r < len; ++r) {
892       const PetscInt row = lrows[r] + A->rmap->rstart;
893       if (row >= A->cmap->N) continue;
894       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
895     }
896     aijA->nonew = nnwA;
897     aijB->nonew = nnwB;
898   } else {
899     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
900     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
901   }
902   PetscCall(PetscFree(lrows));
903   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
904   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
905 
906   /* only change matrix nonzero state if pattern was allowed to be changed */
907   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
908     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
909     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
910   }
911   PetscFunctionReturn(PETSC_SUCCESS);
912 }
913 
914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
915 {
916   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
917   PetscInt           n = A->rmap->n;
918   PetscInt           i, j, r, m, len = 0;
919   PetscInt          *lrows, *owners = A->rmap->range;
920   PetscMPIInt        p = 0;
921   PetscSFNode       *rrows;
922   PetscSF            sf;
923   const PetscScalar *xx;
924   PetscScalar       *bb, *mask, *aij_a;
925   Vec                xmask, lmask;
926   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
927   const PetscInt    *aj, *ii, *ridx;
928   PetscScalar       *aa;
929 
930   PetscFunctionBegin;
931   /* Create SF where leaves are input rows and roots are owned rows */
932   PetscCall(PetscMalloc1(n, &lrows));
933   for (r = 0; r < n; ++r) lrows[r] = -1;
934   PetscCall(PetscMalloc1(N, &rrows));
935   for (r = 0; r < N; ++r) {
936     const PetscInt idx = rows[r];
937     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
938     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
939       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
940     }
941     rrows[r].rank  = p;
942     rrows[r].index = rows[r] - owners[p];
943   }
944   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
945   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
946   /* Collect flags for rows to be zeroed */
947   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
948   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
949   PetscCall(PetscSFDestroy(&sf));
950   /* Compress and put in row numbers */
951   for (r = 0; r < n; ++r)
952     if (lrows[r] >= 0) lrows[len++] = r;
953   /* zero diagonal part of matrix */
954   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
955   /* handle off-diagonal part of matrix */
956   PetscCall(MatCreateVecs(A, &xmask, NULL));
957   PetscCall(VecDuplicate(l->lvec, &lmask));
958   PetscCall(VecGetArray(xmask, &bb));
959   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
960   PetscCall(VecRestoreArray(xmask, &bb));
961   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
962   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
963   PetscCall(VecDestroy(&xmask));
964   if (x && b) { /* this code is buggy when the row and column layout don't match */
965     PetscBool cong;
966 
967     PetscCall(MatHasCongruentLayouts(A, &cong));
968     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
969     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
970     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
971     PetscCall(VecGetArrayRead(l->lvec, &xx));
972     PetscCall(VecGetArray(b, &bb));
973   }
974   PetscCall(VecGetArray(lmask, &mask));
975   /* remove zeroed rows of off-diagonal matrix */
976   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
977   ii = aij->i;
978   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
979   /* loop over all elements of off process part of matrix zeroing removed columns*/
980   if (aij->compressedrow.use) {
981     m    = aij->compressedrow.nrows;
982     ii   = aij->compressedrow.i;
983     ridx = aij->compressedrow.rindex;
984     for (i = 0; i < m; i++) {
985       n  = ii[i + 1] - ii[i];
986       aj = aij->j + ii[i];
987       aa = aij_a + ii[i];
988 
989       for (j = 0; j < n; j++) {
990         if (PetscAbsScalar(mask[*aj])) {
991           if (b) bb[*ridx] -= *aa * xx[*aj];
992           *aa = 0.0;
993         }
994         aa++;
995         aj++;
996       }
997       ridx++;
998     }
999   } else { /* do not use compressed row format */
1000     m = l->B->rmap->n;
1001     for (i = 0; i < m; i++) {
1002       n  = ii[i + 1] - ii[i];
1003       aj = aij->j + ii[i];
1004       aa = aij_a + ii[i];
1005       for (j = 0; j < n; j++) {
1006         if (PetscAbsScalar(mask[*aj])) {
1007           if (b) bb[i] -= *aa * xx[*aj];
1008           *aa = 0.0;
1009         }
1010         aa++;
1011         aj++;
1012       }
1013     }
1014   }
1015   if (x && b) {
1016     PetscCall(VecRestoreArray(b, &bb));
1017     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1018   }
1019   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1020   PetscCall(VecRestoreArray(lmask, &mask));
1021   PetscCall(VecDestroy(&lmask));
1022   PetscCall(PetscFree(lrows));
1023 
1024   /* only change matrix nonzero state if pattern was allowed to be changed */
1025   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1026     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1027     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1028   }
1029   PetscFunctionReturn(PETSC_SUCCESS);
1030 }
1031 
1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1033 {
1034   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1035   PetscInt    nt;
1036   VecScatter  Mvctx = a->Mvctx;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(VecGetLocalSize(xx, &nt));
1040   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1041   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1042   PetscUseTypeMethod(a->A, mult, xx, yy);
1043   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1044   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1045   PetscFunctionReturn(PETSC_SUCCESS);
1046 }
1047 
1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1049 {
1050   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1051 
1052   PetscFunctionBegin;
1053   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1058 {
1059   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1060   VecScatter  Mvctx = a->Mvctx;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1065   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1066   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1071 {
1072   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1073 
1074   PetscFunctionBegin;
1075   /* do nondiagonal part */
1076   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1077   /* do local part */
1078   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1079   /* add partial results together */
1080   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1081   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1082   PetscFunctionReturn(PETSC_SUCCESS);
1083 }
1084 
1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1086 {
1087   MPI_Comm    comm;
1088   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1089   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1090   IS          Me, Notme;
1091   PetscInt    M, N, first, last, *notme, i;
1092   PetscBool   lf;
1093   PetscMPIInt size;
1094 
1095   PetscFunctionBegin;
1096   /* Easy test: symmetric diagonal block */
1097   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1098   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1099   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1100   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1101   PetscCallMPI(MPI_Comm_size(comm, &size));
1102   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1103 
1104   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1105   PetscCall(MatGetSize(Amat, &M, &N));
1106   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1107   PetscCall(PetscMalloc1(N - last + first, &notme));
1108   for (i = 0; i < first; i++) notme[i] = i;
1109   for (i = last; i < M; i++) notme[i - last + first] = i;
1110   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1111   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1112   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1113   Aoff = Aoffs[0];
1114   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1115   Boff = Boffs[0];
1116   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1117   PetscCall(MatDestroyMatrices(1, &Aoffs));
1118   PetscCall(MatDestroyMatrices(1, &Boffs));
1119   PetscCall(ISDestroy(&Me));
1120   PetscCall(ISDestroy(&Notme));
1121   PetscCall(PetscFree(notme));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1126 {
1127   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1128 
1129   PetscFunctionBegin;
1130   /* do nondiagonal part */
1131   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1132   /* do local part */
1133   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1134   /* add partial results together */
1135   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1136   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1137   PetscFunctionReturn(PETSC_SUCCESS);
1138 }
1139 
1140 /*
1141   This only works correctly for square matrices where the subblock A->A is the
1142    diagonal block
1143 */
1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1145 {
1146   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1147 
1148   PetscFunctionBegin;
1149   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1150   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1151   PetscCall(MatGetDiagonal(a->A, v));
1152   PetscFunctionReturn(PETSC_SUCCESS);
1153 }
1154 
1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1156 {
1157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCall(MatScale(a->A, aa));
1161   PetscCall(MatScale(a->B, aa));
1162   PetscFunctionReturn(PETSC_SUCCESS);
1163 }
1164 
1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1166 {
1167   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1168   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1169   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1170   const PetscInt    *garray = aij->garray;
1171   const PetscScalar *aa, *ba;
1172   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1173   PetscInt64         nz, hnz;
1174   PetscInt          *rowlens;
1175   PetscInt          *colidxs;
1176   PetscScalar       *matvals;
1177   PetscMPIInt        rank;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(PetscViewerSetUp(viewer));
1181 
1182   M  = mat->rmap->N;
1183   N  = mat->cmap->N;
1184   m  = mat->rmap->n;
1185   rs = mat->rmap->rstart;
1186   cs = mat->cmap->rstart;
1187   nz = A->nz + B->nz;
1188 
1189   /* write matrix header */
1190   header[0] = MAT_FILE_CLASSID;
1191   header[1] = M;
1192   header[2] = N;
1193   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1194   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1195   if (rank == 0) {
1196     if (hnz > PETSC_INT_MAX) header[3] = PETSC_INT_MAX;
1197     else header[3] = (PetscInt)hnz;
1198   }
1199   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1200 
1201   /* fill in and store row lengths  */
1202   PetscCall(PetscMalloc1(m, &rowlens));
1203   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1204   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1205   PetscCall(PetscFree(rowlens));
1206 
1207   /* fill in and store column indices */
1208   PetscCall(PetscMalloc1(nz, &colidxs));
1209   for (cnt = 0, i = 0; i < m; i++) {
1210     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1211       if (garray[B->j[jb]] > cs) break;
1212       colidxs[cnt++] = garray[B->j[jb]];
1213     }
1214     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1215     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1216   }
1217   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1218   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1219   PetscCall(PetscFree(colidxs));
1220 
1221   /* fill in and store nonzero values */
1222   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1223   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1224   PetscCall(PetscMalloc1(nz, &matvals));
1225   for (cnt = 0, i = 0; i < m; i++) {
1226     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1227       if (garray[B->j[jb]] > cs) break;
1228       matvals[cnt++] = ba[jb];
1229     }
1230     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1231     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1232   }
1233   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1234   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1235   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1236   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1237   PetscCall(PetscFree(matvals));
1238 
1239   /* write block size option to the viewer's .info file */
1240   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 #include <petscdraw.h>
1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1246 {
1247   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1248   PetscMPIInt       rank = aij->rank, size = aij->size;
1249   PetscBool         isdraw, iascii, isbinary;
1250   PetscViewer       sviewer;
1251   PetscViewerFormat format;
1252 
1253   PetscFunctionBegin;
1254   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1255   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1256   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1257   if (iascii) {
1258     PetscCall(PetscViewerGetFormat(viewer, &format));
1259     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1260       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1261       PetscCall(PetscMalloc1(size, &nz));
1262       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1263       for (i = 0; i < (PetscInt)size; i++) {
1264         nmax = PetscMax(nmax, nz[i]);
1265         nmin = PetscMin(nmin, nz[i]);
1266         navg += nz[i];
1267       }
1268       PetscCall(PetscFree(nz));
1269       navg = navg / size;
1270       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1271       PetscFunctionReturn(PETSC_SUCCESS);
1272     }
1273     PetscCall(PetscViewerGetFormat(viewer, &format));
1274     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1275       MatInfo   info;
1276       PetscInt *inodes = NULL;
1277 
1278       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1279       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1280       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1281       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1282       if (!inodes) {
1283         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1284                                                      (double)info.memory));
1285       } else {
1286         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1287                                                      (double)info.memory));
1288       }
1289       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1290       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1291       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1292       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1293       PetscCall(PetscViewerFlush(viewer));
1294       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1295       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1296       PetscCall(VecScatterView(aij->Mvctx, viewer));
1297       PetscFunctionReturn(PETSC_SUCCESS);
1298     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1299       PetscInt inodecount, inodelimit, *inodes;
1300       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1301       if (inodes) {
1302         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1303       } else {
1304         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1305       }
1306       PetscFunctionReturn(PETSC_SUCCESS);
1307     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1308       PetscFunctionReturn(PETSC_SUCCESS);
1309     }
1310   } else if (isbinary) {
1311     if (size == 1) {
1312       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1313       PetscCall(MatView(aij->A, viewer));
1314     } else {
1315       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1316     }
1317     PetscFunctionReturn(PETSC_SUCCESS);
1318   } else if (iascii && size == 1) {
1319     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1320     PetscCall(MatView(aij->A, viewer));
1321     PetscFunctionReturn(PETSC_SUCCESS);
1322   } else if (isdraw) {
1323     PetscDraw draw;
1324     PetscBool isnull;
1325     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1326     PetscCall(PetscDrawIsNull(draw, &isnull));
1327     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1328   }
1329 
1330   { /* assemble the entire matrix onto first processor */
1331     Mat A = NULL, Av;
1332     IS  isrow, iscol;
1333 
1334     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1335     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1336     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1337     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1338     /*  The commented code uses MatCreateSubMatrices instead */
1339     /*
1340     Mat *AA, A = NULL, Av;
1341     IS  isrow,iscol;
1342 
1343     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1344     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1345     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1346     if (rank == 0) {
1347        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1348        A    = AA[0];
1349        Av   = AA[0];
1350     }
1351     PetscCall(MatDestroySubMatrices(1,&AA));
1352 */
1353     PetscCall(ISDestroy(&iscol));
1354     PetscCall(ISDestroy(&isrow));
1355     /*
1356        Everyone has to call to draw the matrix since the graphics waits are
1357        synchronized across all processors that share the PetscDraw object
1358     */
1359     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1360     if (rank == 0) {
1361       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1362       PetscCall(MatView_SeqAIJ(Av, sviewer));
1363     }
1364     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1365     PetscCall(MatDestroy(&A));
1366   }
1367   PetscFunctionReturn(PETSC_SUCCESS);
1368 }
1369 
1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1371 {
1372   PetscBool iascii, isdraw, issocket, isbinary;
1373 
1374   PetscFunctionBegin;
1375   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1376   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1377   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1378   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1379   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1380   PetscFunctionReturn(PETSC_SUCCESS);
1381 }
1382 
1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1384 {
1385   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1386   Vec         bb1 = NULL;
1387   PetscBool   hasop;
1388 
1389   PetscFunctionBegin;
1390   if (flag == SOR_APPLY_UPPER) {
1391     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1392     PetscFunctionReturn(PETSC_SUCCESS);
1393   }
1394 
1395   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1396 
1397   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1398     if (flag & SOR_ZERO_INITIAL_GUESS) {
1399       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1400       its--;
1401     }
1402 
1403     while (its--) {
1404       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1405       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1406 
1407       /* update rhs: bb1 = bb - B*x */
1408       PetscCall(VecScale(mat->lvec, -1.0));
1409       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1410 
1411       /* local sweep */
1412       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1413     }
1414   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419     while (its--) {
1420       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1421       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422 
1423       /* update rhs: bb1 = bb - B*x */
1424       PetscCall(VecScale(mat->lvec, -1.0));
1425       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1426 
1427       /* local sweep */
1428       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1429     }
1430   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1433       its--;
1434     }
1435     while (its--) {
1436       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438 
1439       /* update rhs: bb1 = bb - B*x */
1440       PetscCall(VecScale(mat->lvec, -1.0));
1441       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1442 
1443       /* local sweep */
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1445     }
1446   } else if (flag & SOR_EISENSTAT) {
1447     Vec xx1;
1448 
1449     PetscCall(VecDuplicate(bb, &xx1));
1450     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1451 
1452     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454     if (!mat->diag) {
1455       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1456       PetscCall(MatGetDiagonal(matin, mat->diag));
1457     }
1458     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1459     if (hasop) {
1460       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1461     } else {
1462       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1463     }
1464     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1465 
1466     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1467 
1468     /* local sweep */
1469     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1470     PetscCall(VecAXPY(xx, 1.0, xx1));
1471     PetscCall(VecDestroy(&xx1));
1472   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1473 
1474   PetscCall(VecDestroy(&bb1));
1475 
1476   matin->factorerrortype = mat->A->factorerrortype;
1477   PetscFunctionReturn(PETSC_SUCCESS);
1478 }
1479 
1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1481 {
1482   Mat             aA, aB, Aperm;
1483   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1484   PetscScalar    *aa, *ba;
1485   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1486   PetscSF         rowsf, sf;
1487   IS              parcolp = NULL;
1488   PetscBool       done;
1489 
1490   PetscFunctionBegin;
1491   PetscCall(MatGetLocalSize(A, &m, &n));
1492   PetscCall(ISGetIndices(rowp, &rwant));
1493   PetscCall(ISGetIndices(colp, &cwant));
1494   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1495 
1496   /* Invert row permutation to find out where my rows should go */
1497   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1498   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1499   PetscCall(PetscSFSetFromOptions(rowsf));
1500   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1501   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1502   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1503 
1504   /* Invert column permutation to find out where my columns should go */
1505   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1506   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1507   PetscCall(PetscSFSetFromOptions(sf));
1508   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1509   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1510   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1511   PetscCall(PetscSFDestroy(&sf));
1512 
1513   PetscCall(ISRestoreIndices(rowp, &rwant));
1514   PetscCall(ISRestoreIndices(colp, &cwant));
1515   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1516 
1517   /* Find out where my gcols should go */
1518   PetscCall(MatGetSize(aB, NULL, &ng));
1519   PetscCall(PetscMalloc1(ng, &gcdest));
1520   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1521   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1522   PetscCall(PetscSFSetFromOptions(sf));
1523   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1524   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1525   PetscCall(PetscSFDestroy(&sf));
1526 
1527   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1528   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1529   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1530   for (i = 0; i < m; i++) {
1531     PetscInt    row = rdest[i];
1532     PetscMPIInt rowner;
1533     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1534     for (j = ai[i]; j < ai[i + 1]; j++) {
1535       PetscInt    col = cdest[aj[j]];
1536       PetscMPIInt cowner;
1537       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1538       if (rowner == cowner) dnnz[i]++;
1539       else onnz[i]++;
1540     }
1541     for (j = bi[i]; j < bi[i + 1]; j++) {
1542       PetscInt    col = gcdest[bj[j]];
1543       PetscMPIInt cowner;
1544       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1545       if (rowner == cowner) dnnz[i]++;
1546       else onnz[i]++;
1547     }
1548   }
1549   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1550   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1551   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1552   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1553   PetscCall(PetscSFDestroy(&rowsf));
1554 
1555   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1556   PetscCall(MatSeqAIJGetArray(aA, &aa));
1557   PetscCall(MatSeqAIJGetArray(aB, &ba));
1558   for (i = 0; i < m; i++) {
1559     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1560     PetscInt  j0, rowlen;
1561     rowlen = ai[i + 1] - ai[i];
1562     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1563       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1564       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1565     }
1566     rowlen = bi[i + 1] - bi[i];
1567     for (j0 = j = 0; j < rowlen; j0 = j) {
1568       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1569       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1570     }
1571   }
1572   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1573   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1574   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1575   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1576   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1577   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1578   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1579   PetscCall(PetscFree3(work, rdest, cdest));
1580   PetscCall(PetscFree(gcdest));
1581   if (parcolp) PetscCall(ISDestroy(&colp));
1582   *B = Aperm;
1583   PetscFunctionReturn(PETSC_SUCCESS);
1584 }
1585 
1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1587 {
1588   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1589 
1590   PetscFunctionBegin;
1591   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1592   if (ghosts) *ghosts = aij->garray;
1593   PetscFunctionReturn(PETSC_SUCCESS);
1594 }
1595 
1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1597 {
1598   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1599   Mat            A = mat->A, B = mat->B;
1600   PetscLogDouble isend[5], irecv[5];
1601 
1602   PetscFunctionBegin;
1603   info->block_size = 1.0;
1604   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1605 
1606   isend[0] = info->nz_used;
1607   isend[1] = info->nz_allocated;
1608   isend[2] = info->nz_unneeded;
1609   isend[3] = info->memory;
1610   isend[4] = info->mallocs;
1611 
1612   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1613 
1614   isend[0] += info->nz_used;
1615   isend[1] += info->nz_allocated;
1616   isend[2] += info->nz_unneeded;
1617   isend[3] += info->memory;
1618   isend[4] += info->mallocs;
1619   if (flag == MAT_LOCAL) {
1620     info->nz_used      = isend[0];
1621     info->nz_allocated = isend[1];
1622     info->nz_unneeded  = isend[2];
1623     info->memory       = isend[3];
1624     info->mallocs      = isend[4];
1625   } else if (flag == MAT_GLOBAL_MAX) {
1626     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1627 
1628     info->nz_used      = irecv[0];
1629     info->nz_allocated = irecv[1];
1630     info->nz_unneeded  = irecv[2];
1631     info->memory       = irecv[3];
1632     info->mallocs      = irecv[4];
1633   } else if (flag == MAT_GLOBAL_SUM) {
1634     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1635 
1636     info->nz_used      = irecv[0];
1637     info->nz_allocated = irecv[1];
1638     info->nz_unneeded  = irecv[2];
1639     info->memory       = irecv[3];
1640     info->mallocs      = irecv[4];
1641   }
1642   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1643   info->fill_ratio_needed = 0;
1644   info->factor_mallocs    = 0;
1645   PetscFunctionReturn(PETSC_SUCCESS);
1646 }
1647 
1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1649 {
1650   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1651 
1652   PetscFunctionBegin;
1653   switch (op) {
1654   case MAT_NEW_NONZERO_LOCATIONS:
1655   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1656   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1657   case MAT_KEEP_NONZERO_PATTERN:
1658   case MAT_NEW_NONZERO_LOCATION_ERR:
1659   case MAT_USE_INODES:
1660   case MAT_IGNORE_ZERO_ENTRIES:
1661   case MAT_FORM_EXPLICIT_TRANSPOSE:
1662     MatCheckPreallocated(A, 1);
1663     PetscCall(MatSetOption(a->A, op, flg));
1664     PetscCall(MatSetOption(a->B, op, flg));
1665     break;
1666   case MAT_ROW_ORIENTED:
1667     MatCheckPreallocated(A, 1);
1668     a->roworiented = flg;
1669 
1670     PetscCall(MatSetOption(a->A, op, flg));
1671     PetscCall(MatSetOption(a->B, op, flg));
1672     break;
1673   case MAT_FORCE_DIAGONAL_ENTRIES:
1674   case MAT_SORTED_FULL:
1675     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1676     break;
1677   case MAT_IGNORE_OFF_PROC_ENTRIES:
1678     a->donotstash = flg;
1679     break;
1680   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1681   case MAT_SPD:
1682   case MAT_SYMMETRIC:
1683   case MAT_STRUCTURALLY_SYMMETRIC:
1684   case MAT_HERMITIAN:
1685   case MAT_SYMMETRY_ETERNAL:
1686   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1687   case MAT_SPD_ETERNAL:
1688     /* if the diagonal matrix is square it inherits some of the properties above */
1689     break;
1690   case MAT_SUBMAT_SINGLEIS:
1691     A->submat_singleis = flg;
1692     break;
1693   case MAT_STRUCTURE_ONLY:
1694     /* The option is handled directly by MatSetOption() */
1695     break;
1696   default:
1697     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1698   }
1699   PetscFunctionReturn(PETSC_SUCCESS);
1700 }
1701 
1702 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1703 {
1704   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1705   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1706   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1707   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1708   PetscInt    *cmap, *idx_p;
1709 
1710   PetscFunctionBegin;
1711   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1712   mat->getrowactive = PETSC_TRUE;
1713 
1714   if (!mat->rowvalues && (idx || v)) {
1715     /*
1716         allocate enough space to hold information from the longest row.
1717     */
1718     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1719     PetscInt    max = 1, tmp;
1720     for (i = 0; i < matin->rmap->n; i++) {
1721       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1722       if (max < tmp) max = tmp;
1723     }
1724     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1725   }
1726 
1727   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1728   lrow = row - rstart;
1729 
1730   pvA = &vworkA;
1731   pcA = &cworkA;
1732   pvB = &vworkB;
1733   pcB = &cworkB;
1734   if (!v) {
1735     pvA = NULL;
1736     pvB = NULL;
1737   }
1738   if (!idx) {
1739     pcA = NULL;
1740     if (!v) pcB = NULL;
1741   }
1742   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1743   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1744   nztot = nzA + nzB;
1745 
1746   cmap = mat->garray;
1747   if (v || idx) {
1748     if (nztot) {
1749       /* Sort by increasing column numbers, assuming A and B already sorted */
1750       PetscInt imark = -1;
1751       if (v) {
1752         *v = v_p = mat->rowvalues;
1753         for (i = 0; i < nzB; i++) {
1754           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1755           else break;
1756         }
1757         imark = i;
1758         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1759         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1760       }
1761       if (idx) {
1762         *idx = idx_p = mat->rowindices;
1763         if (imark > -1) {
1764           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1765         } else {
1766           for (i = 0; i < nzB; i++) {
1767             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1768             else break;
1769           }
1770           imark = i;
1771         }
1772         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1773         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1774       }
1775     } else {
1776       if (idx) *idx = NULL;
1777       if (v) *v = NULL;
1778     }
1779   }
1780   *nz = nztot;
1781   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1782   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1783   PetscFunctionReturn(PETSC_SUCCESS);
1784 }
1785 
1786 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1787 {
1788   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1789 
1790   PetscFunctionBegin;
1791   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1792   aij->getrowactive = PETSC_FALSE;
1793   PetscFunctionReturn(PETSC_SUCCESS);
1794 }
1795 
1796 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1797 {
1798   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1799   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1800   PetscInt         i, j, cstart = mat->cmap->rstart;
1801   PetscReal        sum = 0.0;
1802   const MatScalar *v, *amata, *bmata;
1803   PetscMPIInt      iN;
1804 
1805   PetscFunctionBegin;
1806   if (aij->size == 1) {
1807     PetscCall(MatNorm(aij->A, type, norm));
1808   } else {
1809     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1810     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1811     if (type == NORM_FROBENIUS) {
1812       v = amata;
1813       for (i = 0; i < amat->nz; i++) {
1814         sum += PetscRealPart(PetscConj(*v) * (*v));
1815         v++;
1816       }
1817       v = bmata;
1818       for (i = 0; i < bmat->nz; i++) {
1819         sum += PetscRealPart(PetscConj(*v) * (*v));
1820         v++;
1821       }
1822       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1823       *norm = PetscSqrtReal(*norm);
1824       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1825     } else if (type == NORM_1) { /* max column norm */
1826       PetscReal *tmp, *tmp2;
1827       PetscInt  *jj, *garray = aij->garray;
1828       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1829       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1830       *norm = 0.0;
1831       v     = amata;
1832       jj    = amat->j;
1833       for (j = 0; j < amat->nz; j++) {
1834         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1835         v++;
1836       }
1837       v  = bmata;
1838       jj = bmat->j;
1839       for (j = 0; j < bmat->nz; j++) {
1840         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1841         v++;
1842       }
1843       PetscCall(PetscMPIIntCast(mat->cmap->N, &iN));
1844       PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1845       for (j = 0; j < mat->cmap->N; j++) {
1846         if (tmp2[j] > *norm) *norm = tmp2[j];
1847       }
1848       PetscCall(PetscFree(tmp));
1849       PetscCall(PetscFree(tmp2));
1850       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1851     } else if (type == NORM_INFINITY) { /* max row norm */
1852       PetscReal ntemp = 0.0;
1853       for (j = 0; j < aij->A->rmap->n; j++) {
1854         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1855         sum = 0.0;
1856         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1857           sum += PetscAbsScalar(*v);
1858           v++;
1859         }
1860         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1861         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1862           sum += PetscAbsScalar(*v);
1863           v++;
1864         }
1865         if (sum > ntemp) ntemp = sum;
1866       }
1867       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1868       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1869     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1870     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1871     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1872   }
1873   PetscFunctionReturn(PETSC_SUCCESS);
1874 }
1875 
1876 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1877 {
1878   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1879   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1880   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1881   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1882   Mat              B, A_diag, *B_diag;
1883   const MatScalar *pbv, *bv;
1884 
1885   PetscFunctionBegin;
1886   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1887   ma = A->rmap->n;
1888   na = A->cmap->n;
1889   mb = a->B->rmap->n;
1890   nb = a->B->cmap->n;
1891   ai = Aloc->i;
1892   aj = Aloc->j;
1893   bi = Bloc->i;
1894   bj = Bloc->j;
1895   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1896     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1897     PetscSFNode         *oloc;
1898     PETSC_UNUSED PetscSF sf;
1899 
1900     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1901     /* compute d_nnz for preallocation */
1902     PetscCall(PetscArrayzero(d_nnz, na));
1903     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1904     /* compute local off-diagonal contributions */
1905     PetscCall(PetscArrayzero(g_nnz, nb));
1906     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1907     /* map those to global */
1908     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1909     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1910     PetscCall(PetscSFSetFromOptions(sf));
1911     PetscCall(PetscArrayzero(o_nnz, na));
1912     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1913     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1914     PetscCall(PetscSFDestroy(&sf));
1915 
1916     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1917     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1918     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1919     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1920     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1921     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1922   } else {
1923     B = *matout;
1924     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1925   }
1926 
1927   b           = (Mat_MPIAIJ *)B->data;
1928   A_diag      = a->A;
1929   B_diag      = &b->A;
1930   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1931   A_diag_ncol = A_diag->cmap->N;
1932   B_diag_ilen = sub_B_diag->ilen;
1933   B_diag_i    = sub_B_diag->i;
1934 
1935   /* Set ilen for diagonal of B */
1936   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1937 
1938   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1939   very quickly (=without using MatSetValues), because all writes are local. */
1940   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1941   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1942 
1943   /* copy over the B part */
1944   PetscCall(PetscMalloc1(bi[mb], &cols));
1945   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1946   pbv = bv;
1947   row = A->rmap->rstart;
1948   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1949   cols_tmp = cols;
1950   for (i = 0; i < mb; i++) {
1951     ncol = bi[i + 1] - bi[i];
1952     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1953     row++;
1954     if (pbv) pbv += ncol;
1955     if (cols_tmp) cols_tmp += ncol;
1956   }
1957   PetscCall(PetscFree(cols));
1958   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1959 
1960   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1961   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1962   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1963     *matout = B;
1964   } else {
1965     PetscCall(MatHeaderMerge(A, &B));
1966   }
1967   PetscFunctionReturn(PETSC_SUCCESS);
1968 }
1969 
1970 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1971 {
1972   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1973   Mat         a = aij->A, b = aij->B;
1974   PetscInt    s1, s2, s3;
1975 
1976   PetscFunctionBegin;
1977   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1978   if (rr) {
1979     PetscCall(VecGetLocalSize(rr, &s1));
1980     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1981     /* Overlap communication with computation. */
1982     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1983   }
1984   if (ll) {
1985     PetscCall(VecGetLocalSize(ll, &s1));
1986     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1987     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1988   }
1989   /* scale  the diagonal block */
1990   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1991 
1992   if (rr) {
1993     /* Do a scatter end and then right scale the off-diagonal block */
1994     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1995     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1996   }
1997   PetscFunctionReturn(PETSC_SUCCESS);
1998 }
1999 
2000 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2001 {
2002   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2003 
2004   PetscFunctionBegin;
2005   PetscCall(MatSetUnfactored(a->A));
2006   PetscFunctionReturn(PETSC_SUCCESS);
2007 }
2008 
2009 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2010 {
2011   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2012   Mat         a, b, c, d;
2013   PetscBool   flg;
2014 
2015   PetscFunctionBegin;
2016   a = matA->A;
2017   b = matA->B;
2018   c = matB->A;
2019   d = matB->B;
2020 
2021   PetscCall(MatEqual(a, c, &flg));
2022   if (flg) PetscCall(MatEqual(b, d, &flg));
2023   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2024   PetscFunctionReturn(PETSC_SUCCESS);
2025 }
2026 
2027 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2028 {
2029   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2030   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2031 
2032   PetscFunctionBegin;
2033   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2034   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2035     /* because of the column compression in the off-processor part of the matrix a->B,
2036        the number of columns in a->B and b->B may be different, hence we cannot call
2037        the MatCopy() directly on the two parts. If need be, we can provide a more
2038        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2039        then copying the submatrices */
2040     PetscCall(MatCopy_Basic(A, B, str));
2041   } else {
2042     PetscCall(MatCopy(a->A, b->A, str));
2043     PetscCall(MatCopy(a->B, b->B, str));
2044   }
2045   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2046   PetscFunctionReturn(PETSC_SUCCESS);
2047 }
2048 
2049 /*
2050    Computes the number of nonzeros per row needed for preallocation when X and Y
2051    have different nonzero structure.
2052 */
2053 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2054 {
2055   PetscInt i, j, k, nzx, nzy;
2056 
2057   PetscFunctionBegin;
2058   /* Set the number of nonzeros in the new matrix */
2059   for (i = 0; i < m; i++) {
2060     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2061     nzx    = xi[i + 1] - xi[i];
2062     nzy    = yi[i + 1] - yi[i];
2063     nnz[i] = 0;
2064     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2065       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2066       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2067       nnz[i]++;
2068     }
2069     for (; k < nzy; k++) nnz[i]++;
2070   }
2071   PetscFunctionReturn(PETSC_SUCCESS);
2072 }
2073 
2074 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2075 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2076 {
2077   PetscInt    m = Y->rmap->N;
2078   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2079   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2080 
2081   PetscFunctionBegin;
2082   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2083   PetscFunctionReturn(PETSC_SUCCESS);
2084 }
2085 
2086 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2087 {
2088   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2089 
2090   PetscFunctionBegin;
2091   if (str == SAME_NONZERO_PATTERN) {
2092     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2093     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2094   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2095     PetscCall(MatAXPY_Basic(Y, a, X, str));
2096   } else {
2097     Mat       B;
2098     PetscInt *nnz_d, *nnz_o;
2099 
2100     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2101     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2102     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2103     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2104     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2105     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2106     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2107     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2108     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2109     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2110     PetscCall(MatHeaderMerge(Y, &B));
2111     PetscCall(PetscFree(nnz_d));
2112     PetscCall(PetscFree(nnz_o));
2113   }
2114   PetscFunctionReturn(PETSC_SUCCESS);
2115 }
2116 
2117 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2118 
2119 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2120 {
2121   PetscFunctionBegin;
2122   if (PetscDefined(USE_COMPLEX)) {
2123     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2124 
2125     PetscCall(MatConjugate_SeqAIJ(aij->A));
2126     PetscCall(MatConjugate_SeqAIJ(aij->B));
2127   }
2128   PetscFunctionReturn(PETSC_SUCCESS);
2129 }
2130 
2131 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2132 {
2133   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2134 
2135   PetscFunctionBegin;
2136   PetscCall(MatRealPart(a->A));
2137   PetscCall(MatRealPart(a->B));
2138   PetscFunctionReturn(PETSC_SUCCESS);
2139 }
2140 
2141 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2142 {
2143   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2144 
2145   PetscFunctionBegin;
2146   PetscCall(MatImaginaryPart(a->A));
2147   PetscCall(MatImaginaryPart(a->B));
2148   PetscFunctionReturn(PETSC_SUCCESS);
2149 }
2150 
2151 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2152 {
2153   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2154   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2155   PetscScalar       *va, *vv;
2156   Vec                vB, vA;
2157   const PetscScalar *vb;
2158 
2159   PetscFunctionBegin;
2160   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2161   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2162 
2163   PetscCall(VecGetArrayWrite(vA, &va));
2164   if (idx) {
2165     for (i = 0; i < m; i++) {
2166       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2167     }
2168   }
2169 
2170   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2171   PetscCall(PetscMalloc1(m, &idxb));
2172   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2173 
2174   PetscCall(VecGetArrayWrite(v, &vv));
2175   PetscCall(VecGetArrayRead(vB, &vb));
2176   for (i = 0; i < m; i++) {
2177     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2178       vv[i] = vb[i];
2179       if (idx) idx[i] = a->garray[idxb[i]];
2180     } else {
2181       vv[i] = va[i];
2182       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2183     }
2184   }
2185   PetscCall(VecRestoreArrayWrite(vA, &vv));
2186   PetscCall(VecRestoreArrayWrite(vA, &va));
2187   PetscCall(VecRestoreArrayRead(vB, &vb));
2188   PetscCall(PetscFree(idxb));
2189   PetscCall(VecDestroy(&vA));
2190   PetscCall(VecDestroy(&vB));
2191   PetscFunctionReturn(PETSC_SUCCESS);
2192 }
2193 
2194 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2195 {
2196   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2197   Vec         vB, vA;
2198 
2199   PetscFunctionBegin;
2200   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2201   PetscCall(MatGetRowSumAbs(a->A, vA));
2202   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2203   PetscCall(MatGetRowSumAbs(a->B, vB));
2204   PetscCall(VecAXPY(vA, 1.0, vB));
2205   PetscCall(VecDestroy(&vB));
2206   PetscCall(VecCopy(vA, v));
2207   PetscCall(VecDestroy(&vA));
2208   PetscFunctionReturn(PETSC_SUCCESS);
2209 }
2210 
2211 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2212 {
2213   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2214   PetscInt           m = A->rmap->n, n = A->cmap->n;
2215   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2216   PetscInt          *cmap = mat->garray;
2217   PetscInt          *diagIdx, *offdiagIdx;
2218   Vec                diagV, offdiagV;
2219   PetscScalar       *a, *diagA, *offdiagA;
2220   const PetscScalar *ba, *bav;
2221   PetscInt           r, j, col, ncols, *bi, *bj;
2222   Mat                B = mat->B;
2223   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2224 
2225   PetscFunctionBegin;
2226   /* When a process holds entire A and other processes have no entry */
2227   if (A->cmap->N == n) {
2228     PetscCall(VecGetArrayWrite(v, &diagA));
2229     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2230     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2231     PetscCall(VecDestroy(&diagV));
2232     PetscCall(VecRestoreArrayWrite(v, &diagA));
2233     PetscFunctionReturn(PETSC_SUCCESS);
2234   } else if (n == 0) {
2235     if (m) {
2236       PetscCall(VecGetArrayWrite(v, &a));
2237       for (r = 0; r < m; r++) {
2238         a[r] = 0.0;
2239         if (idx) idx[r] = -1;
2240       }
2241       PetscCall(VecRestoreArrayWrite(v, &a));
2242     }
2243     PetscFunctionReturn(PETSC_SUCCESS);
2244   }
2245 
2246   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2247   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2248   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2249   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2250 
2251   /* Get offdiagIdx[] for implicit 0.0 */
2252   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2253   ba = bav;
2254   bi = b->i;
2255   bj = b->j;
2256   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2257   for (r = 0; r < m; r++) {
2258     ncols = bi[r + 1] - bi[r];
2259     if (ncols == A->cmap->N - n) { /* Brow is dense */
2260       offdiagA[r]   = *ba;
2261       offdiagIdx[r] = cmap[0];
2262     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2263       offdiagA[r] = 0.0;
2264 
2265       /* Find first hole in the cmap */
2266       for (j = 0; j < ncols; j++) {
2267         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2268         if (col > j && j < cstart) {
2269           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2270           break;
2271         } else if (col > j + n && j >= cstart) {
2272           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2273           break;
2274         }
2275       }
2276       if (j == ncols && ncols < A->cmap->N - n) {
2277         /* a hole is outside compressed Bcols */
2278         if (ncols == 0) {
2279           if (cstart) {
2280             offdiagIdx[r] = 0;
2281           } else offdiagIdx[r] = cend;
2282         } else { /* ncols > 0 */
2283           offdiagIdx[r] = cmap[ncols - 1] + 1;
2284           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2285         }
2286       }
2287     }
2288 
2289     for (j = 0; j < ncols; j++) {
2290       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2291         offdiagA[r]   = *ba;
2292         offdiagIdx[r] = cmap[*bj];
2293       }
2294       ba++;
2295       bj++;
2296     }
2297   }
2298 
2299   PetscCall(VecGetArrayWrite(v, &a));
2300   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2301   for (r = 0; r < m; ++r) {
2302     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2303       a[r] = diagA[r];
2304       if (idx) idx[r] = cstart + diagIdx[r];
2305     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2306       a[r] = diagA[r];
2307       if (idx) {
2308         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2309           idx[r] = cstart + diagIdx[r];
2310         } else idx[r] = offdiagIdx[r];
2311       }
2312     } else {
2313       a[r] = offdiagA[r];
2314       if (idx) idx[r] = offdiagIdx[r];
2315     }
2316   }
2317   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2318   PetscCall(VecRestoreArrayWrite(v, &a));
2319   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2320   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2321   PetscCall(VecDestroy(&diagV));
2322   PetscCall(VecDestroy(&offdiagV));
2323   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2324   PetscFunctionReturn(PETSC_SUCCESS);
2325 }
2326 
2327 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2328 {
2329   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2330   PetscInt           m = A->rmap->n, n = A->cmap->n;
2331   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2332   PetscInt          *cmap = mat->garray;
2333   PetscInt          *diagIdx, *offdiagIdx;
2334   Vec                diagV, offdiagV;
2335   PetscScalar       *a, *diagA, *offdiagA;
2336   const PetscScalar *ba, *bav;
2337   PetscInt           r, j, col, ncols, *bi, *bj;
2338   Mat                B = mat->B;
2339   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2340 
2341   PetscFunctionBegin;
2342   /* When a process holds entire A and other processes have no entry */
2343   if (A->cmap->N == n) {
2344     PetscCall(VecGetArrayWrite(v, &diagA));
2345     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2346     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2347     PetscCall(VecDestroy(&diagV));
2348     PetscCall(VecRestoreArrayWrite(v, &diagA));
2349     PetscFunctionReturn(PETSC_SUCCESS);
2350   } else if (n == 0) {
2351     if (m) {
2352       PetscCall(VecGetArrayWrite(v, &a));
2353       for (r = 0; r < m; r++) {
2354         a[r] = PETSC_MAX_REAL;
2355         if (idx) idx[r] = -1;
2356       }
2357       PetscCall(VecRestoreArrayWrite(v, &a));
2358     }
2359     PetscFunctionReturn(PETSC_SUCCESS);
2360   }
2361 
2362   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2363   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2364   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2365   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2366 
2367   /* Get offdiagIdx[] for implicit 0.0 */
2368   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2369   ba = bav;
2370   bi = b->i;
2371   bj = b->j;
2372   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2373   for (r = 0; r < m; r++) {
2374     ncols = bi[r + 1] - bi[r];
2375     if (ncols == A->cmap->N - n) { /* Brow is dense */
2376       offdiagA[r]   = *ba;
2377       offdiagIdx[r] = cmap[0];
2378     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2379       offdiagA[r] = 0.0;
2380 
2381       /* Find first hole in the cmap */
2382       for (j = 0; j < ncols; j++) {
2383         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2384         if (col > j && j < cstart) {
2385           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2386           break;
2387         } else if (col > j + n && j >= cstart) {
2388           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2389           break;
2390         }
2391       }
2392       if (j == ncols && ncols < A->cmap->N - n) {
2393         /* a hole is outside compressed Bcols */
2394         if (ncols == 0) {
2395           if (cstart) {
2396             offdiagIdx[r] = 0;
2397           } else offdiagIdx[r] = cend;
2398         } else { /* ncols > 0 */
2399           offdiagIdx[r] = cmap[ncols - 1] + 1;
2400           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2401         }
2402       }
2403     }
2404 
2405     for (j = 0; j < ncols; j++) {
2406       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2407         offdiagA[r]   = *ba;
2408         offdiagIdx[r] = cmap[*bj];
2409       }
2410       ba++;
2411       bj++;
2412     }
2413   }
2414 
2415   PetscCall(VecGetArrayWrite(v, &a));
2416   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2417   for (r = 0; r < m; ++r) {
2418     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2419       a[r] = diagA[r];
2420       if (idx) idx[r] = cstart + diagIdx[r];
2421     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2422       a[r] = diagA[r];
2423       if (idx) {
2424         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2425           idx[r] = cstart + diagIdx[r];
2426         } else idx[r] = offdiagIdx[r];
2427       }
2428     } else {
2429       a[r] = offdiagA[r];
2430       if (idx) idx[r] = offdiagIdx[r];
2431     }
2432   }
2433   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2434   PetscCall(VecRestoreArrayWrite(v, &a));
2435   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2436   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2437   PetscCall(VecDestroy(&diagV));
2438   PetscCall(VecDestroy(&offdiagV));
2439   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2440   PetscFunctionReturn(PETSC_SUCCESS);
2441 }
2442 
2443 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2444 {
2445   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2446   PetscInt           m = A->rmap->n, n = A->cmap->n;
2447   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2448   PetscInt          *cmap = mat->garray;
2449   PetscInt          *diagIdx, *offdiagIdx;
2450   Vec                diagV, offdiagV;
2451   PetscScalar       *a, *diagA, *offdiagA;
2452   const PetscScalar *ba, *bav;
2453   PetscInt           r, j, col, ncols, *bi, *bj;
2454   Mat                B = mat->B;
2455   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2456 
2457   PetscFunctionBegin;
2458   /* When a process holds entire A and other processes have no entry */
2459   if (A->cmap->N == n) {
2460     PetscCall(VecGetArrayWrite(v, &diagA));
2461     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2462     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2463     PetscCall(VecDestroy(&diagV));
2464     PetscCall(VecRestoreArrayWrite(v, &diagA));
2465     PetscFunctionReturn(PETSC_SUCCESS);
2466   } else if (n == 0) {
2467     if (m) {
2468       PetscCall(VecGetArrayWrite(v, &a));
2469       for (r = 0; r < m; r++) {
2470         a[r] = PETSC_MIN_REAL;
2471         if (idx) idx[r] = -1;
2472       }
2473       PetscCall(VecRestoreArrayWrite(v, &a));
2474     }
2475     PetscFunctionReturn(PETSC_SUCCESS);
2476   }
2477 
2478   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2479   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2480   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2481   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2482 
2483   /* Get offdiagIdx[] for implicit 0.0 */
2484   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2485   ba = bav;
2486   bi = b->i;
2487   bj = b->j;
2488   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2489   for (r = 0; r < m; r++) {
2490     ncols = bi[r + 1] - bi[r];
2491     if (ncols == A->cmap->N - n) { /* Brow is dense */
2492       offdiagA[r]   = *ba;
2493       offdiagIdx[r] = cmap[0];
2494     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2495       offdiagA[r] = 0.0;
2496 
2497       /* Find first hole in the cmap */
2498       for (j = 0; j < ncols; j++) {
2499         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2500         if (col > j && j < cstart) {
2501           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2502           break;
2503         } else if (col > j + n && j >= cstart) {
2504           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2505           break;
2506         }
2507       }
2508       if (j == ncols && ncols < A->cmap->N - n) {
2509         /* a hole is outside compressed Bcols */
2510         if (ncols == 0) {
2511           if (cstart) {
2512             offdiagIdx[r] = 0;
2513           } else offdiagIdx[r] = cend;
2514         } else { /* ncols > 0 */
2515           offdiagIdx[r] = cmap[ncols - 1] + 1;
2516           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2517         }
2518       }
2519     }
2520 
2521     for (j = 0; j < ncols; j++) {
2522       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2523         offdiagA[r]   = *ba;
2524         offdiagIdx[r] = cmap[*bj];
2525       }
2526       ba++;
2527       bj++;
2528     }
2529   }
2530 
2531   PetscCall(VecGetArrayWrite(v, &a));
2532   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2533   for (r = 0; r < m; ++r) {
2534     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2535       a[r] = diagA[r];
2536       if (idx) idx[r] = cstart + diagIdx[r];
2537     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2538       a[r] = diagA[r];
2539       if (idx) {
2540         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2541           idx[r] = cstart + diagIdx[r];
2542         } else idx[r] = offdiagIdx[r];
2543       }
2544     } else {
2545       a[r] = offdiagA[r];
2546       if (idx) idx[r] = offdiagIdx[r];
2547     }
2548   }
2549   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2550   PetscCall(VecRestoreArrayWrite(v, &a));
2551   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2552   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2553   PetscCall(VecDestroy(&diagV));
2554   PetscCall(VecDestroy(&offdiagV));
2555   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2556   PetscFunctionReturn(PETSC_SUCCESS);
2557 }
2558 
2559 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2560 {
2561   Mat *dummy;
2562 
2563   PetscFunctionBegin;
2564   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2565   *newmat = *dummy;
2566   PetscCall(PetscFree(dummy));
2567   PetscFunctionReturn(PETSC_SUCCESS);
2568 }
2569 
2570 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2571 {
2572   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2573 
2574   PetscFunctionBegin;
2575   PetscCall(MatInvertBlockDiagonal(a->A, values));
2576   A->factorerrortype = a->A->factorerrortype;
2577   PetscFunctionReturn(PETSC_SUCCESS);
2578 }
2579 
2580 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2581 {
2582   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2583 
2584   PetscFunctionBegin;
2585   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2586   PetscCall(MatSetRandom(aij->A, rctx));
2587   if (x->assembled) {
2588     PetscCall(MatSetRandom(aij->B, rctx));
2589   } else {
2590     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2591   }
2592   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2593   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2594   PetscFunctionReturn(PETSC_SUCCESS);
2595 }
2596 
2597 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2598 {
2599   PetscFunctionBegin;
2600   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2601   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2602   PetscFunctionReturn(PETSC_SUCCESS);
2603 }
2604 
2605 /*@
2606   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2607 
2608   Not Collective
2609 
2610   Input Parameter:
2611 . A - the matrix
2612 
2613   Output Parameter:
2614 . nz - the number of nonzeros
2615 
2616   Level: advanced
2617 
2618 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2619 @*/
2620 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2621 {
2622   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2623   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2624   PetscBool   isaij;
2625 
2626   PetscFunctionBegin;
2627   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2628   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2629   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2630   PetscFunctionReturn(PETSC_SUCCESS);
2631 }
2632 
2633 /*@
2634   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2635 
2636   Collective
2637 
2638   Input Parameters:
2639 + A  - the matrix
2640 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2641 
2642   Level: advanced
2643 
2644 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2645 @*/
2646 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2647 {
2648   PetscFunctionBegin;
2649   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2650   PetscFunctionReturn(PETSC_SUCCESS);
2651 }
2652 
2653 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2654 {
2655   PetscBool sc = PETSC_FALSE, flg;
2656 
2657   PetscFunctionBegin;
2658   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2659   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2660   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2661   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2662   PetscOptionsHeadEnd();
2663   PetscFunctionReturn(PETSC_SUCCESS);
2664 }
2665 
2666 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2667 {
2668   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2669   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2670 
2671   PetscFunctionBegin;
2672   if (!Y->preallocated) {
2673     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2674   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2675     PetscInt nonew = aij->nonew;
2676     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2677     aij->nonew = nonew;
2678   }
2679   PetscCall(MatShift_Basic(Y, a));
2680   PetscFunctionReturn(PETSC_SUCCESS);
2681 }
2682 
2683 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2684 {
2685   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2686 
2687   PetscFunctionBegin;
2688   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2689   PetscCall(MatMissingDiagonal(a->A, missing, d));
2690   if (d) {
2691     PetscInt rstart;
2692     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2693     *d += rstart;
2694   }
2695   PetscFunctionReturn(PETSC_SUCCESS);
2696 }
2697 
2698 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2699 {
2700   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2701 
2702   PetscFunctionBegin;
2703   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2704   PetscFunctionReturn(PETSC_SUCCESS);
2705 }
2706 
2707 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2708 {
2709   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2710 
2711   PetscFunctionBegin;
2712   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2713   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2714   PetscFunctionReturn(PETSC_SUCCESS);
2715 }
2716 
2717 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2718                                        MatGetRow_MPIAIJ,
2719                                        MatRestoreRow_MPIAIJ,
2720                                        MatMult_MPIAIJ,
2721                                        /* 4*/ MatMultAdd_MPIAIJ,
2722                                        MatMultTranspose_MPIAIJ,
2723                                        MatMultTransposeAdd_MPIAIJ,
2724                                        NULL,
2725                                        NULL,
2726                                        NULL,
2727                                        /*10*/ NULL,
2728                                        NULL,
2729                                        NULL,
2730                                        MatSOR_MPIAIJ,
2731                                        MatTranspose_MPIAIJ,
2732                                        /*15*/ MatGetInfo_MPIAIJ,
2733                                        MatEqual_MPIAIJ,
2734                                        MatGetDiagonal_MPIAIJ,
2735                                        MatDiagonalScale_MPIAIJ,
2736                                        MatNorm_MPIAIJ,
2737                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2738                                        MatAssemblyEnd_MPIAIJ,
2739                                        MatSetOption_MPIAIJ,
2740                                        MatZeroEntries_MPIAIJ,
2741                                        /*24*/ MatZeroRows_MPIAIJ,
2742                                        NULL,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        /*29*/ MatSetUp_MPI_Hash,
2747                                        NULL,
2748                                        NULL,
2749                                        MatGetDiagonalBlock_MPIAIJ,
2750                                        NULL,
2751                                        /*34*/ MatDuplicate_MPIAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                        /*39*/ MatAXPY_MPIAIJ,
2757                                        MatCreateSubMatrices_MPIAIJ,
2758                                        MatIncreaseOverlap_MPIAIJ,
2759                                        MatGetValues_MPIAIJ,
2760                                        MatCopy_MPIAIJ,
2761                                        /*44*/ MatGetRowMax_MPIAIJ,
2762                                        MatScale_MPIAIJ,
2763                                        MatShift_MPIAIJ,
2764                                        MatDiagonalSet_MPIAIJ,
2765                                        MatZeroRowsColumns_MPIAIJ,
2766                                        /*49*/ MatSetRandom_MPIAIJ,
2767                                        MatGetRowIJ_MPIAIJ,
2768                                        MatRestoreRowIJ_MPIAIJ,
2769                                        NULL,
2770                                        NULL,
2771                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2772                                        NULL,
2773                                        MatSetUnfactored_MPIAIJ,
2774                                        MatPermute_MPIAIJ,
2775                                        NULL,
2776                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2777                                        MatDestroy_MPIAIJ,
2778                                        MatView_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        /*64*/ NULL,
2782                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2783                                        NULL,
2784                                        NULL,
2785                                        NULL,
2786                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2787                                        MatGetRowMinAbs_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                        NULL,
2791                                        NULL,
2792                                        /*75*/ MatFDColoringApply_AIJ,
2793                                        MatSetFromOptions_MPIAIJ,
2794                                        NULL,
2795                                        NULL,
2796                                        MatFindZeroDiagonals_MPIAIJ,
2797                                        /*80*/ NULL,
2798                                        NULL,
2799                                        NULL,
2800                                        /*83*/ MatLoad_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        /*89*/ NULL,
2807                                        NULL,
2808                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2809                                        NULL,
2810                                        NULL,
2811                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2812                                        NULL,
2813                                        NULL,
2814                                        NULL,
2815                                        MatBindToCPU_MPIAIJ,
2816                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2817                                        NULL,
2818                                        NULL,
2819                                        MatConjugate_MPIAIJ,
2820                                        NULL,
2821                                        /*104*/ MatSetValuesRow_MPIAIJ,
2822                                        MatRealPart_MPIAIJ,
2823                                        MatImaginaryPart_MPIAIJ,
2824                                        NULL,
2825                                        NULL,
2826                                        /*109*/ NULL,
2827                                        NULL,
2828                                        MatGetRowMin_MPIAIJ,
2829                                        NULL,
2830                                        MatMissingDiagonal_MPIAIJ,
2831                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2832                                        NULL,
2833                                        MatGetGhosts_MPIAIJ,
2834                                        NULL,
2835                                        NULL,
2836                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2837                                        NULL,
2838                                        NULL,
2839                                        NULL,
2840                                        MatGetMultiProcBlock_MPIAIJ,
2841                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2842                                        MatGetColumnReductions_MPIAIJ,
2843                                        MatInvertBlockDiagonal_MPIAIJ,
2844                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2845                                        MatCreateSubMatricesMPI_MPIAIJ,
2846                                        /*129*/ NULL,
2847                                        NULL,
2848                                        NULL,
2849                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2850                                        NULL,
2851                                        /*134*/ NULL,
2852                                        NULL,
2853                                        NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2857                                        NULL,
2858                                        NULL,
2859                                        MatFDColoringSetUp_MPIXAIJ,
2860                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2861                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2862                                        /*145*/ NULL,
2863                                        NULL,
2864                                        NULL,
2865                                        MatCreateGraph_Simple_AIJ,
2866                                        NULL,
2867                                        /*150*/ NULL,
2868                                        MatEliminateZeros_MPIAIJ,
2869                                        MatGetRowSumAbs_MPIAIJ,
2870                                        NULL,
2871                                        NULL,
2872                                        NULL};
2873 
2874 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2875 {
2876   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2877 
2878   PetscFunctionBegin;
2879   PetscCall(MatStoreValues(aij->A));
2880   PetscCall(MatStoreValues(aij->B));
2881   PetscFunctionReturn(PETSC_SUCCESS);
2882 }
2883 
2884 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2885 {
2886   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2887 
2888   PetscFunctionBegin;
2889   PetscCall(MatRetrieveValues(aij->A));
2890   PetscCall(MatRetrieveValues(aij->B));
2891   PetscFunctionReturn(PETSC_SUCCESS);
2892 }
2893 
2894 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2895 {
2896   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2897   PetscMPIInt size;
2898 
2899   PetscFunctionBegin;
2900   if (B->hash_active) {
2901     B->ops[0]      = b->cops;
2902     B->hash_active = PETSC_FALSE;
2903   }
2904   PetscCall(PetscLayoutSetUp(B->rmap));
2905   PetscCall(PetscLayoutSetUp(B->cmap));
2906 
2907 #if defined(PETSC_USE_CTABLE)
2908   PetscCall(PetscHMapIDestroy(&b->colmap));
2909 #else
2910   PetscCall(PetscFree(b->colmap));
2911 #endif
2912   PetscCall(PetscFree(b->garray));
2913   PetscCall(VecDestroy(&b->lvec));
2914   PetscCall(VecScatterDestroy(&b->Mvctx));
2915 
2916   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2917 
2918   MatSeqXAIJGetOptions_Private(b->B);
2919   PetscCall(MatDestroy(&b->B));
2920   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2921   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2922   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2923   PetscCall(MatSetType(b->B, MATSEQAIJ));
2924   MatSeqXAIJRestoreOptions_Private(b->B);
2925 
2926   MatSeqXAIJGetOptions_Private(b->A);
2927   PetscCall(MatDestroy(&b->A));
2928   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2929   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2930   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2931   PetscCall(MatSetType(b->A, MATSEQAIJ));
2932   MatSeqXAIJRestoreOptions_Private(b->A);
2933 
2934   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2935   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2936   B->preallocated  = PETSC_TRUE;
2937   B->was_assembled = PETSC_FALSE;
2938   B->assembled     = PETSC_FALSE;
2939   PetscFunctionReturn(PETSC_SUCCESS);
2940 }
2941 
2942 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2943 {
2944   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2945 
2946   PetscFunctionBegin;
2947   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2948   PetscCall(PetscLayoutSetUp(B->rmap));
2949   PetscCall(PetscLayoutSetUp(B->cmap));
2950 
2951 #if defined(PETSC_USE_CTABLE)
2952   PetscCall(PetscHMapIDestroy(&b->colmap));
2953 #else
2954   PetscCall(PetscFree(b->colmap));
2955 #endif
2956   PetscCall(PetscFree(b->garray));
2957   PetscCall(VecDestroy(&b->lvec));
2958   PetscCall(VecScatterDestroy(&b->Mvctx));
2959 
2960   PetscCall(MatResetPreallocation(b->A));
2961   PetscCall(MatResetPreallocation(b->B));
2962   B->preallocated  = PETSC_TRUE;
2963   B->was_assembled = PETSC_FALSE;
2964   B->assembled     = PETSC_FALSE;
2965   PetscFunctionReturn(PETSC_SUCCESS);
2966 }
2967 
2968 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2969 {
2970   Mat         mat;
2971   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2972 
2973   PetscFunctionBegin;
2974   *newmat = NULL;
2975   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2976   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2977   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2978   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2979   a = (Mat_MPIAIJ *)mat->data;
2980 
2981   mat->factortype = matin->factortype;
2982   mat->assembled  = matin->assembled;
2983   mat->insertmode = NOT_SET_VALUES;
2984 
2985   a->size         = oldmat->size;
2986   a->rank         = oldmat->rank;
2987   a->donotstash   = oldmat->donotstash;
2988   a->roworiented  = oldmat->roworiented;
2989   a->rowindices   = NULL;
2990   a->rowvalues    = NULL;
2991   a->getrowactive = PETSC_FALSE;
2992 
2993   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2994   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2995   if (matin->hash_active) {
2996     PetscCall(MatSetUp(mat));
2997   } else {
2998     mat->preallocated = matin->preallocated;
2999     if (oldmat->colmap) {
3000 #if defined(PETSC_USE_CTABLE)
3001       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3002 #else
3003       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3004       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3005 #endif
3006     } else a->colmap = NULL;
3007     if (oldmat->garray) {
3008       PetscInt len;
3009       len = oldmat->B->cmap->n;
3010       PetscCall(PetscMalloc1(len + 1, &a->garray));
3011       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3012     } else a->garray = NULL;
3013 
3014     /* It may happen MatDuplicate is called with a non-assembled matrix
3015       In fact, MatDuplicate only requires the matrix to be preallocated
3016       This may happen inside a DMCreateMatrix_Shell */
3017     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3018     if (oldmat->Mvctx) {
3019       a->Mvctx = oldmat->Mvctx;
3020       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3021     }
3022     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3023     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3024   }
3025   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3026   *newmat = mat;
3027   PetscFunctionReturn(PETSC_SUCCESS);
3028 }
3029 
3030 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3031 {
3032   PetscBool isbinary, ishdf5;
3033 
3034   PetscFunctionBegin;
3035   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3036   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3037   /* force binary viewer to load .info file if it has not yet done so */
3038   PetscCall(PetscViewerSetUp(viewer));
3039   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3040   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3041   if (isbinary) {
3042     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3043   } else if (ishdf5) {
3044 #if defined(PETSC_HAVE_HDF5)
3045     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3046 #else
3047     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3048 #endif
3049   } else {
3050     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3051   }
3052   PetscFunctionReturn(PETSC_SUCCESS);
3053 }
3054 
3055 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3056 {
3057   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3058   PetscInt    *rowidxs, *colidxs;
3059   PetscScalar *matvals;
3060 
3061   PetscFunctionBegin;
3062   PetscCall(PetscViewerSetUp(viewer));
3063 
3064   /* read in matrix header */
3065   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3066   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3067   M  = header[1];
3068   N  = header[2];
3069   nz = header[3];
3070   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3071   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3072   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3073 
3074   /* set block sizes from the viewer's .info file */
3075   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3076   /* set global sizes if not set already */
3077   if (mat->rmap->N < 0) mat->rmap->N = M;
3078   if (mat->cmap->N < 0) mat->cmap->N = N;
3079   PetscCall(PetscLayoutSetUp(mat->rmap));
3080   PetscCall(PetscLayoutSetUp(mat->cmap));
3081 
3082   /* check if the matrix sizes are correct */
3083   PetscCall(MatGetSize(mat, &rows, &cols));
3084   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3085 
3086   /* read in row lengths and build row indices */
3087   PetscCall(MatGetLocalSize(mat, &m, NULL));
3088   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3089   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3090   rowidxs[0] = 0;
3091   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3092   if (nz != PETSC_INT_MAX) {
3093     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3094     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3095   }
3096 
3097   /* read in column indices and matrix values */
3098   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3099   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3100   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3101   /* store matrix indices and values */
3102   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3103   PetscCall(PetscFree(rowidxs));
3104   PetscCall(PetscFree2(colidxs, matvals));
3105   PetscFunctionReturn(PETSC_SUCCESS);
3106 }
3107 
3108 /* Not scalable because of ISAllGather() unless getting all columns. */
3109 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3110 {
3111   IS          iscol_local;
3112   PetscBool   isstride;
3113   PetscMPIInt lisstride = 0, gisstride;
3114 
3115   PetscFunctionBegin;
3116   /* check if we are grabbing all columns*/
3117   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3118 
3119   if (isstride) {
3120     PetscInt start, len, mstart, mlen;
3121     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3122     PetscCall(ISGetLocalSize(iscol, &len));
3123     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3124     if (mstart == start && mlen - mstart == len) lisstride = 1;
3125   }
3126 
3127   PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3128   if (gisstride) {
3129     PetscInt N;
3130     PetscCall(MatGetSize(mat, NULL, &N));
3131     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3132     PetscCall(ISSetIdentity(iscol_local));
3133     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3134   } else {
3135     PetscInt cbs;
3136     PetscCall(ISGetBlockSize(iscol, &cbs));
3137     PetscCall(ISAllGather(iscol, &iscol_local));
3138     PetscCall(ISSetBlockSize(iscol_local, cbs));
3139   }
3140 
3141   *isseq = iscol_local;
3142   PetscFunctionReturn(PETSC_SUCCESS);
3143 }
3144 
3145 /*
3146  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3147  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3148 
3149  Input Parameters:
3150 +   mat - matrix
3151 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3152            i.e., mat->rstart <= isrow[i] < mat->rend
3153 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3154            i.e., mat->cstart <= iscol[i] < mat->cend
3155 
3156  Output Parameters:
3157 +   isrow_d - sequential row index set for retrieving mat->A
3158 .   iscol_d - sequential  column index set for retrieving mat->A
3159 .   iscol_o - sequential column index set for retrieving mat->B
3160 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3161  */
3162 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3163 {
3164   Vec             x, cmap;
3165   const PetscInt *is_idx;
3166   PetscScalar    *xarray, *cmaparray;
3167   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3168   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3169   Mat             B    = a->B;
3170   Vec             lvec = a->lvec, lcmap;
3171   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3172   MPI_Comm        comm;
3173   VecScatter      Mvctx = a->Mvctx;
3174 
3175   PetscFunctionBegin;
3176   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3177   PetscCall(ISGetLocalSize(iscol, &ncols));
3178 
3179   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3180   PetscCall(MatCreateVecs(mat, &x, NULL));
3181   PetscCall(VecSet(x, -1.0));
3182   PetscCall(VecDuplicate(x, &cmap));
3183   PetscCall(VecSet(cmap, -1.0));
3184 
3185   /* Get start indices */
3186   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3187   isstart -= ncols;
3188   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3189 
3190   PetscCall(ISGetIndices(iscol, &is_idx));
3191   PetscCall(VecGetArray(x, &xarray));
3192   PetscCall(VecGetArray(cmap, &cmaparray));
3193   PetscCall(PetscMalloc1(ncols, &idx));
3194   for (i = 0; i < ncols; i++) {
3195     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3196     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3197     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3198   }
3199   PetscCall(VecRestoreArray(x, &xarray));
3200   PetscCall(VecRestoreArray(cmap, &cmaparray));
3201   PetscCall(ISRestoreIndices(iscol, &is_idx));
3202 
3203   /* Get iscol_d */
3204   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3205   PetscCall(ISGetBlockSize(iscol, &i));
3206   PetscCall(ISSetBlockSize(*iscol_d, i));
3207 
3208   /* Get isrow_d */
3209   PetscCall(ISGetLocalSize(isrow, &m));
3210   rstart = mat->rmap->rstart;
3211   PetscCall(PetscMalloc1(m, &idx));
3212   PetscCall(ISGetIndices(isrow, &is_idx));
3213   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3214   PetscCall(ISRestoreIndices(isrow, &is_idx));
3215 
3216   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3217   PetscCall(ISGetBlockSize(isrow, &i));
3218   PetscCall(ISSetBlockSize(*isrow_d, i));
3219 
3220   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3221   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3222   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3223 
3224   PetscCall(VecDuplicate(lvec, &lcmap));
3225 
3226   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3227   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3228 
3229   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3230   /* off-process column indices */
3231   count = 0;
3232   PetscCall(PetscMalloc1(Bn, &idx));
3233   PetscCall(PetscMalloc1(Bn, &cmap1));
3234 
3235   PetscCall(VecGetArray(lvec, &xarray));
3236   PetscCall(VecGetArray(lcmap, &cmaparray));
3237   for (i = 0; i < Bn; i++) {
3238     if (PetscRealPart(xarray[i]) > -1.0) {
3239       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3240       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3241       count++;
3242     }
3243   }
3244   PetscCall(VecRestoreArray(lvec, &xarray));
3245   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3246 
3247   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3248   /* cannot ensure iscol_o has same blocksize as iscol! */
3249 
3250   PetscCall(PetscFree(idx));
3251   *garray = cmap1;
3252 
3253   PetscCall(VecDestroy(&x));
3254   PetscCall(VecDestroy(&cmap));
3255   PetscCall(VecDestroy(&lcmap));
3256   PetscFunctionReturn(PETSC_SUCCESS);
3257 }
3258 
3259 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3260 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3261 {
3262   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3263   Mat         M = NULL;
3264   MPI_Comm    comm;
3265   IS          iscol_d, isrow_d, iscol_o;
3266   Mat         Asub = NULL, Bsub = NULL;
3267   PetscInt    n;
3268 
3269   PetscFunctionBegin;
3270   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3271 
3272   if (call == MAT_REUSE_MATRIX) {
3273     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3274     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3275     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3276 
3277     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3278     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3279 
3280     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3281     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3282 
3283     /* Update diagonal and off-diagonal portions of submat */
3284     asub = (Mat_MPIAIJ *)(*submat)->data;
3285     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3286     PetscCall(ISGetLocalSize(iscol_o, &n));
3287     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3288     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3289     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3290 
3291   } else { /* call == MAT_INITIAL_MATRIX) */
3292     const PetscInt *garray;
3293     PetscInt        BsubN;
3294 
3295     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3296     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3297 
3298     /* Create local submatrices Asub and Bsub */
3299     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3300     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3301 
3302     /* Create submatrix M */
3303     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3304 
3305     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3306     asub = (Mat_MPIAIJ *)M->data;
3307 
3308     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3309     n = asub->B->cmap->N;
3310     if (BsubN > n) {
3311       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3312       const PetscInt *idx;
3313       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3314       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3315 
3316       PetscCall(PetscMalloc1(n, &idx_new));
3317       j = 0;
3318       PetscCall(ISGetIndices(iscol_o, &idx));
3319       for (i = 0; i < n; i++) {
3320         if (j >= BsubN) break;
3321         while (subgarray[i] > garray[j]) j++;
3322 
3323         if (subgarray[i] == garray[j]) {
3324           idx_new[i] = idx[j++];
3325         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3326       }
3327       PetscCall(ISRestoreIndices(iscol_o, &idx));
3328 
3329       PetscCall(ISDestroy(&iscol_o));
3330       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3331 
3332     } else if (BsubN < n) {
3333       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3334     }
3335 
3336     PetscCall(PetscFree(garray));
3337     *submat = M;
3338 
3339     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3340     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3341     PetscCall(ISDestroy(&isrow_d));
3342 
3343     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3344     PetscCall(ISDestroy(&iscol_d));
3345 
3346     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3347     PetscCall(ISDestroy(&iscol_o));
3348   }
3349   PetscFunctionReturn(PETSC_SUCCESS);
3350 }
3351 
3352 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3353 {
3354   IS        iscol_local = NULL, isrow_d;
3355   PetscInt  csize;
3356   PetscInt  n, i, j, start, end;
3357   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3358   MPI_Comm  comm;
3359 
3360   PetscFunctionBegin;
3361   /* If isrow has same processor distribution as mat,
3362      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3363   if (call == MAT_REUSE_MATRIX) {
3364     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3365     if (isrow_d) {
3366       sameRowDist  = PETSC_TRUE;
3367       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3368     } else {
3369       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3370       if (iscol_local) {
3371         sameRowDist  = PETSC_TRUE;
3372         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3373       }
3374     }
3375   } else {
3376     /* Check if isrow has same processor distribution as mat */
3377     sameDist[0] = PETSC_FALSE;
3378     PetscCall(ISGetLocalSize(isrow, &n));
3379     if (!n) {
3380       sameDist[0] = PETSC_TRUE;
3381     } else {
3382       PetscCall(ISGetMinMax(isrow, &i, &j));
3383       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3384       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3385     }
3386 
3387     /* Check if iscol has same processor distribution as mat */
3388     sameDist[1] = PETSC_FALSE;
3389     PetscCall(ISGetLocalSize(iscol, &n));
3390     if (!n) {
3391       sameDist[1] = PETSC_TRUE;
3392     } else {
3393       PetscCall(ISGetMinMax(iscol, &i, &j));
3394       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3395       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3396     }
3397 
3398     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3399     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3400     sameRowDist = tsameDist[0];
3401   }
3402 
3403   if (sameRowDist) {
3404     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3405       /* isrow and iscol have same processor distribution as mat */
3406       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3407       PetscFunctionReturn(PETSC_SUCCESS);
3408     } else { /* sameRowDist */
3409       /* isrow has same processor distribution as mat */
3410       if (call == MAT_INITIAL_MATRIX) {
3411         PetscBool sorted;
3412         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3413         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3414         PetscCall(ISGetSize(iscol, &i));
3415         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3416 
3417         PetscCall(ISSorted(iscol_local, &sorted));
3418         if (sorted) {
3419           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3420           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3421           PetscFunctionReturn(PETSC_SUCCESS);
3422         }
3423       } else { /* call == MAT_REUSE_MATRIX */
3424         IS iscol_sub;
3425         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3426         if (iscol_sub) {
3427           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3428           PetscFunctionReturn(PETSC_SUCCESS);
3429         }
3430       }
3431     }
3432   }
3433 
3434   /* General case: iscol -> iscol_local which has global size of iscol */
3435   if (call == MAT_REUSE_MATRIX) {
3436     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3437     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3438   } else {
3439     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3440   }
3441 
3442   PetscCall(ISGetLocalSize(iscol, &csize));
3443   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3444 
3445   if (call == MAT_INITIAL_MATRIX) {
3446     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3447     PetscCall(ISDestroy(&iscol_local));
3448   }
3449   PetscFunctionReturn(PETSC_SUCCESS);
3450 }
3451 
3452 /*@C
3453   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3454   and "off-diagonal" part of the matrix in CSR format.
3455 
3456   Collective
3457 
3458   Input Parameters:
3459 + comm   - MPI communicator
3460 . A      - "diagonal" portion of matrix
3461 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3462 - garray - global index of `B` columns
3463 
3464   Output Parameter:
3465 . mat - the matrix, with input `A` as its local diagonal matrix
3466 
3467   Level: advanced
3468 
3469   Notes:
3470   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3471 
3472   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3473 
3474 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3475 @*/
3476 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3477 {
3478   Mat_MPIAIJ        *maij;
3479   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3480   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3481   const PetscScalar *oa;
3482   Mat                Bnew;
3483   PetscInt           m, n, N;
3484   MatType            mpi_mat_type;
3485 
3486   PetscFunctionBegin;
3487   PetscCall(MatCreate(comm, mat));
3488   PetscCall(MatGetSize(A, &m, &n));
3489   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3490   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3491   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3492   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3493 
3494   /* Get global columns of mat */
3495   PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3496 
3497   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3498   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3499   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3500   PetscCall(MatSetType(*mat, mpi_mat_type));
3501 
3502   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3503   maij = (Mat_MPIAIJ *)(*mat)->data;
3504 
3505   (*mat)->preallocated = PETSC_TRUE;
3506 
3507   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3508   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3509 
3510   /* Set A as diagonal portion of *mat */
3511   maij->A = A;
3512 
3513   nz = oi[m];
3514   for (i = 0; i < nz; i++) {
3515     col   = oj[i];
3516     oj[i] = garray[col];
3517   }
3518 
3519   /* Set Bnew as off-diagonal portion of *mat */
3520   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3521   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3522   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3523   bnew        = (Mat_SeqAIJ *)Bnew->data;
3524   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3525   maij->B     = Bnew;
3526 
3527   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3528 
3529   b->free_a  = PETSC_FALSE;
3530   b->free_ij = PETSC_FALSE;
3531   PetscCall(MatDestroy(&B));
3532 
3533   bnew->free_a  = PETSC_TRUE;
3534   bnew->free_ij = PETSC_TRUE;
3535 
3536   /* condense columns of maij->B */
3537   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3538   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3539   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3540   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3541   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3542   PetscFunctionReturn(PETSC_SUCCESS);
3543 }
3544 
3545 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3546 
3547 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3548 {
3549   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3550   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3551   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3552   Mat             M, Msub, B = a->B;
3553   MatScalar      *aa;
3554   Mat_SeqAIJ     *aij;
3555   PetscInt       *garray = a->garray, *colsub, Ncols;
3556   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3557   IS              iscol_sub, iscmap;
3558   const PetscInt *is_idx, *cmap;
3559   PetscBool       allcolumns = PETSC_FALSE;
3560   MPI_Comm        comm;
3561 
3562   PetscFunctionBegin;
3563   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3564   if (call == MAT_REUSE_MATRIX) {
3565     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3566     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3567     PetscCall(ISGetLocalSize(iscol_sub, &count));
3568 
3569     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3570     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3571 
3572     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3573     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3574 
3575     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3576 
3577   } else { /* call == MAT_INITIAL_MATRIX) */
3578     PetscBool flg;
3579 
3580     PetscCall(ISGetLocalSize(iscol, &n));
3581     PetscCall(ISGetSize(iscol, &Ncols));
3582 
3583     /* (1) iscol -> nonscalable iscol_local */
3584     /* Check for special case: each processor gets entire matrix columns */
3585     PetscCall(ISIdentity(iscol_local, &flg));
3586     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3587     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3588     if (allcolumns) {
3589       iscol_sub = iscol_local;
3590       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3591       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3592 
3593     } else {
3594       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3595       PetscInt *idx, *cmap1, k;
3596       PetscCall(PetscMalloc1(Ncols, &idx));
3597       PetscCall(PetscMalloc1(Ncols, &cmap1));
3598       PetscCall(ISGetIndices(iscol_local, &is_idx));
3599       count = 0;
3600       k     = 0;
3601       for (i = 0; i < Ncols; i++) {
3602         j = is_idx[i];
3603         if (j >= cstart && j < cend) {
3604           /* diagonal part of mat */
3605           idx[count]     = j;
3606           cmap1[count++] = i; /* column index in submat */
3607         } else if (Bn) {
3608           /* off-diagonal part of mat */
3609           if (j == garray[k]) {
3610             idx[count]     = j;
3611             cmap1[count++] = i; /* column index in submat */
3612           } else if (j > garray[k]) {
3613             while (j > garray[k] && k < Bn - 1) k++;
3614             if (j == garray[k]) {
3615               idx[count]     = j;
3616               cmap1[count++] = i; /* column index in submat */
3617             }
3618           }
3619         }
3620       }
3621       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3622 
3623       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3624       PetscCall(ISGetBlockSize(iscol, &cbs));
3625       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3626 
3627       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3628     }
3629 
3630     /* (3) Create sequential Msub */
3631     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3632   }
3633 
3634   PetscCall(ISGetLocalSize(iscol_sub, &count));
3635   aij = (Mat_SeqAIJ *)Msub->data;
3636   ii  = aij->i;
3637   PetscCall(ISGetIndices(iscmap, &cmap));
3638 
3639   /*
3640       m - number of local rows
3641       Ncols - number of columns (same on all processors)
3642       rstart - first row in new global matrix generated
3643   */
3644   PetscCall(MatGetSize(Msub, &m, NULL));
3645 
3646   if (call == MAT_INITIAL_MATRIX) {
3647     /* (4) Create parallel newmat */
3648     PetscMPIInt rank, size;
3649     PetscInt    csize;
3650 
3651     PetscCallMPI(MPI_Comm_size(comm, &size));
3652     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3653 
3654     /*
3655         Determine the number of non-zeros in the diagonal and off-diagonal
3656         portions of the matrix in order to do correct preallocation
3657     */
3658 
3659     /* first get start and end of "diagonal" columns */
3660     PetscCall(ISGetLocalSize(iscol, &csize));
3661     if (csize == PETSC_DECIDE) {
3662       PetscCall(ISGetSize(isrow, &mglobal));
3663       if (mglobal == Ncols) { /* square matrix */
3664         nlocal = m;
3665       } else {
3666         nlocal = Ncols / size + ((Ncols % size) > rank);
3667       }
3668     } else {
3669       nlocal = csize;
3670     }
3671     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3672     rstart = rend - nlocal;
3673     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3674 
3675     /* next, compute all the lengths */
3676     jj = aij->j;
3677     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3678     olens = dlens + m;
3679     for (i = 0; i < m; i++) {
3680       jend = ii[i + 1] - ii[i];
3681       olen = 0;
3682       dlen = 0;
3683       for (j = 0; j < jend; j++) {
3684         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3685         else dlen++;
3686         jj++;
3687       }
3688       olens[i] = olen;
3689       dlens[i] = dlen;
3690     }
3691 
3692     PetscCall(ISGetBlockSize(isrow, &bs));
3693     PetscCall(ISGetBlockSize(iscol, &cbs));
3694 
3695     PetscCall(MatCreate(comm, &M));
3696     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3697     PetscCall(MatSetBlockSizes(M, bs, cbs));
3698     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3699     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3700     PetscCall(PetscFree(dlens));
3701 
3702   } else { /* call == MAT_REUSE_MATRIX */
3703     M = *newmat;
3704     PetscCall(MatGetLocalSize(M, &i, NULL));
3705     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3706     PetscCall(MatZeroEntries(M));
3707     /*
3708          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3709        rather than the slower MatSetValues().
3710     */
3711     M->was_assembled = PETSC_TRUE;
3712     M->assembled     = PETSC_FALSE;
3713   }
3714 
3715   /* (5) Set values of Msub to *newmat */
3716   PetscCall(PetscMalloc1(count, &colsub));
3717   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3718 
3719   jj = aij->j;
3720   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3721   for (i = 0; i < m; i++) {
3722     row = rstart + i;
3723     nz  = ii[i + 1] - ii[i];
3724     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3725     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3726     jj += nz;
3727     aa += nz;
3728   }
3729   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3730   PetscCall(ISRestoreIndices(iscmap, &cmap));
3731 
3732   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3733   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3734 
3735   PetscCall(PetscFree(colsub));
3736 
3737   /* save Msub, iscol_sub and iscmap used in processor for next request */
3738   if (call == MAT_INITIAL_MATRIX) {
3739     *newmat = M;
3740     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3741     PetscCall(MatDestroy(&Msub));
3742 
3743     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3744     PetscCall(ISDestroy(&iscol_sub));
3745 
3746     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3747     PetscCall(ISDestroy(&iscmap));
3748 
3749     if (iscol_local) {
3750       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3751       PetscCall(ISDestroy(&iscol_local));
3752     }
3753   }
3754   PetscFunctionReturn(PETSC_SUCCESS);
3755 }
3756 
3757 /*
3758     Not great since it makes two copies of the submatrix, first an SeqAIJ
3759   in local and then by concatenating the local matrices the end result.
3760   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3761 
3762   This requires a sequential iscol with all indices.
3763 */
3764 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3765 {
3766   PetscMPIInt rank, size;
3767   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3768   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3769   Mat         M, Mreuse;
3770   MatScalar  *aa, *vwork;
3771   MPI_Comm    comm;
3772   Mat_SeqAIJ *aij;
3773   PetscBool   colflag, allcolumns = PETSC_FALSE;
3774 
3775   PetscFunctionBegin;
3776   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3777   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3778   PetscCallMPI(MPI_Comm_size(comm, &size));
3779 
3780   /* Check for special case: each processor gets entire matrix columns */
3781   PetscCall(ISIdentity(iscol, &colflag));
3782   PetscCall(ISGetLocalSize(iscol, &n));
3783   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3784   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3785 
3786   if (call == MAT_REUSE_MATRIX) {
3787     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3788     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3789     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3790   } else {
3791     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3792   }
3793 
3794   /*
3795       m - number of local rows
3796       n - number of columns (same on all processors)
3797       rstart - first row in new global matrix generated
3798   */
3799   PetscCall(MatGetSize(Mreuse, &m, &n));
3800   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3801   if (call == MAT_INITIAL_MATRIX) {
3802     aij = (Mat_SeqAIJ *)Mreuse->data;
3803     ii  = aij->i;
3804     jj  = aij->j;
3805 
3806     /*
3807         Determine the number of non-zeros in the diagonal and off-diagonal
3808         portions of the matrix in order to do correct preallocation
3809     */
3810 
3811     /* first get start and end of "diagonal" columns */
3812     if (csize == PETSC_DECIDE) {
3813       PetscCall(ISGetSize(isrow, &mglobal));
3814       if (mglobal == n) { /* square matrix */
3815         nlocal = m;
3816       } else {
3817         nlocal = n / size + ((n % size) > rank);
3818       }
3819     } else {
3820       nlocal = csize;
3821     }
3822     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3823     rstart = rend - nlocal;
3824     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3825 
3826     /* next, compute all the lengths */
3827     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3828     olens = dlens + m;
3829     for (i = 0; i < m; i++) {
3830       jend = ii[i + 1] - ii[i];
3831       olen = 0;
3832       dlen = 0;
3833       for (j = 0; j < jend; j++) {
3834         if (*jj < rstart || *jj >= rend) olen++;
3835         else dlen++;
3836         jj++;
3837       }
3838       olens[i] = olen;
3839       dlens[i] = dlen;
3840     }
3841     PetscCall(MatCreate(comm, &M));
3842     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3843     PetscCall(MatSetBlockSizes(M, bs, cbs));
3844     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3845     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3846     PetscCall(PetscFree(dlens));
3847   } else {
3848     PetscInt ml, nl;
3849 
3850     M = *newmat;
3851     PetscCall(MatGetLocalSize(M, &ml, &nl));
3852     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3853     PetscCall(MatZeroEntries(M));
3854     /*
3855          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3856        rather than the slower MatSetValues().
3857     */
3858     M->was_assembled = PETSC_TRUE;
3859     M->assembled     = PETSC_FALSE;
3860   }
3861   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3862   aij = (Mat_SeqAIJ *)Mreuse->data;
3863   ii  = aij->i;
3864   jj  = aij->j;
3865 
3866   /* trigger copy to CPU if needed */
3867   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3868   for (i = 0; i < m; i++) {
3869     row   = rstart + i;
3870     nz    = ii[i + 1] - ii[i];
3871     cwork = jj;
3872     jj    = PetscSafePointerPlusOffset(jj, nz);
3873     vwork = aa;
3874     aa    = PetscSafePointerPlusOffset(aa, nz);
3875     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3876   }
3877   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3878 
3879   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3880   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3881   *newmat = M;
3882 
3883   /* save submatrix used in processor for next request */
3884   if (call == MAT_INITIAL_MATRIX) {
3885     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3886     PetscCall(MatDestroy(&Mreuse));
3887   }
3888   PetscFunctionReturn(PETSC_SUCCESS);
3889 }
3890 
3891 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3892 {
3893   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3894   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3895   const PetscInt *JJ;
3896   PetscBool       nooffprocentries;
3897   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3898 
3899   PetscFunctionBegin;
3900   PetscCall(PetscLayoutSetUp(B->rmap));
3901   PetscCall(PetscLayoutSetUp(B->cmap));
3902   m       = B->rmap->n;
3903   cstart  = B->cmap->rstart;
3904   cend    = B->cmap->rend;
3905   rstart  = B->rmap->rstart;
3906   irstart = Ii[0];
3907 
3908   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3909 
3910   if (PetscDefined(USE_DEBUG)) {
3911     for (i = 0; i < m; i++) {
3912       nnz = Ii[i + 1] - Ii[i];
3913       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3914       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3915       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3916       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3917     }
3918   }
3919 
3920   for (i = 0; i < m; i++) {
3921     nnz     = Ii[i + 1] - Ii[i];
3922     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3923     nnz_max = PetscMax(nnz_max, nnz);
3924     d       = 0;
3925     for (j = 0; j < nnz; j++) {
3926       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3927     }
3928     d_nnz[i] = d;
3929     o_nnz[i] = nnz - d;
3930   }
3931   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3932   PetscCall(PetscFree2(d_nnz, o_nnz));
3933 
3934   for (i = 0; i < m; i++) {
3935     ii = i + rstart;
3936     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3937   }
3938   nooffprocentries    = B->nooffprocentries;
3939   B->nooffprocentries = PETSC_TRUE;
3940   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3941   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3942   B->nooffprocentries = nooffprocentries;
3943 
3944   /* count number of entries below block diagonal */
3945   PetscCall(PetscFree(Aij->ld));
3946   PetscCall(PetscCalloc1(m, &ld));
3947   Aij->ld = ld;
3948   for (i = 0; i < m; i++) {
3949     nnz = Ii[i + 1] - Ii[i];
3950     j   = 0;
3951     while (j < nnz && J[j] < cstart) j++;
3952     ld[i] = j;
3953     if (J) J += nnz;
3954   }
3955 
3956   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3957   PetscFunctionReturn(PETSC_SUCCESS);
3958 }
3959 
3960 /*@
3961   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3962   (the default parallel PETSc format).
3963 
3964   Collective
3965 
3966   Input Parameters:
3967 + B - the matrix
3968 . i - the indices into `j` for the start of each local row (indices start with zero)
3969 . j - the column indices for each local row (indices start with zero)
3970 - v - optional values in the matrix
3971 
3972   Level: developer
3973 
3974   Notes:
3975   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3976   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3977   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3978 
3979   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3980 
3981   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3982 
3983   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3984 
3985   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3986   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3987 
3988   The format which is used for the sparse matrix input, is equivalent to a
3989   row-major ordering.. i.e for the following matrix, the input data expected is
3990   as shown
3991 .vb
3992         1 0 0
3993         2 0 3     P0
3994        -------
3995         4 5 6     P1
3996 
3997      Process0 [P0] rows_owned=[0,1]
3998         i =  {0,1,3}  [size = nrow+1  = 2+1]
3999         j =  {0,0,2}  [size = 3]
4000         v =  {1,2,3}  [size = 3]
4001 
4002      Process1 [P1] rows_owned=[2]
4003         i =  {0,3}    [size = nrow+1  = 1+1]
4004         j =  {0,1,2}  [size = 3]
4005         v =  {4,5,6}  [size = 3]
4006 .ve
4007 
4008 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4009           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4010 @*/
4011 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4012 {
4013   PetscFunctionBegin;
4014   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4015   PetscFunctionReturn(PETSC_SUCCESS);
4016 }
4017 
4018 /*@
4019   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4020   (the default parallel PETSc format).  For good matrix assembly performance
4021   the user should preallocate the matrix storage by setting the parameters
4022   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4023 
4024   Collective
4025 
4026   Input Parameters:
4027 + B     - the matrix
4028 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4029            (same value is used for all local rows)
4030 . d_nnz - array containing the number of nonzeros in the various rows of the
4031            DIAGONAL portion of the local submatrix (possibly different for each row)
4032            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4033            The size of this array is equal to the number of local rows, i.e 'm'.
4034            For matrices that will be factored, you must leave room for (and set)
4035            the diagonal entry even if it is zero.
4036 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4037            submatrix (same value is used for all local rows).
4038 - o_nnz - array containing the number of nonzeros in the various rows of the
4039            OFF-DIAGONAL portion of the local submatrix (possibly different for
4040            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4041            structure. The size of this array is equal to the number
4042            of local rows, i.e 'm'.
4043 
4044   Example Usage:
4045   Consider the following 8x8 matrix with 34 non-zero values, that is
4046   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4047   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4048   as follows
4049 
4050 .vb
4051             1  2  0  |  0  3  0  |  0  4
4052     Proc0   0  5  6  |  7  0  0  |  8  0
4053             9  0 10  | 11  0  0  | 12  0
4054     -------------------------------------
4055            13  0 14  | 15 16 17  |  0  0
4056     Proc1   0 18  0  | 19 20 21  |  0  0
4057             0  0  0  | 22 23  0  | 24  0
4058     -------------------------------------
4059     Proc2  25 26 27  |  0  0 28  | 29  0
4060            30  0  0  | 31 32 33  |  0 34
4061 .ve
4062 
4063   This can be represented as a collection of submatrices as
4064 .vb
4065       A B C
4066       D E F
4067       G H I
4068 .ve
4069 
4070   Where the submatrices A,B,C are owned by proc0, D,E,F are
4071   owned by proc1, G,H,I are owned by proc2.
4072 
4073   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4074   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4075   The 'M','N' parameters are 8,8, and have the same values on all procs.
4076 
4077   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4078   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4079   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4080   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4081   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4082   matrix, and [DF] as another `MATSEQAIJ` matrix.
4083 
4084   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4085   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4086   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4087   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4088   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4089   In this case, the values of `d_nz`, `o_nz` are
4090 .vb
4091      proc0  dnz = 2, o_nz = 2
4092      proc1  dnz = 3, o_nz = 2
4093      proc2  dnz = 1, o_nz = 4
4094 .ve
4095   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4096   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4097   for proc3. i.e we are using 12+15+10=37 storage locations to store
4098   34 values.
4099 
4100   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4101   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4102   In the above case the values for `d_nnz`, `o_nnz` are
4103 .vb
4104      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4105      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4106      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4107 .ve
4108   Here the space allocated is sum of all the above values i.e 34, and
4109   hence pre-allocation is perfect.
4110 
4111   Level: intermediate
4112 
4113   Notes:
4114   If the *_nnz parameter is given then the *_nz parameter is ignored
4115 
4116   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4117   storage.  The stored row and column indices begin with zero.
4118   See [Sparse Matrices](sec_matsparse) for details.
4119 
4120   The parallel matrix is partitioned such that the first m0 rows belong to
4121   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4122   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4123 
4124   The DIAGONAL portion of the local submatrix of a processor can be defined
4125   as the submatrix which is obtained by extraction the part corresponding to
4126   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4127   first row that belongs to the processor, r2 is the last row belonging to
4128   the this processor, and c1-c2 is range of indices of the local part of a
4129   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4130   common case of a square matrix, the row and column ranges are the same and
4131   the DIAGONAL part is also square. The remaining portion of the local
4132   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4133 
4134   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4135 
4136   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4137   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4138   You can also run with the option `-info` and look for messages with the string
4139   malloc in them to see if additional memory allocation was needed.
4140 
4141 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4142           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4143 @*/
4144 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4145 {
4146   PetscFunctionBegin;
4147   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4148   PetscValidType(B, 1);
4149   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4150   PetscFunctionReturn(PETSC_SUCCESS);
4151 }
4152 
4153 /*@
4154   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4155   CSR format for the local rows.
4156 
4157   Collective
4158 
4159   Input Parameters:
4160 + comm - MPI communicator
4161 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4162 . n    - This value should be the same as the local size used in creating the
4163          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4164          calculated if `N` is given) For square matrices n is almost always `m`.
4165 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4166 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4167 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4168 . j    - global column indices
4169 - a    - optional matrix values
4170 
4171   Output Parameter:
4172 . mat - the matrix
4173 
4174   Level: intermediate
4175 
4176   Notes:
4177   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4178   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4179   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4180 
4181   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4182 
4183   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4184 
4185   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4186   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4187 
4188   The format which is used for the sparse matrix input, is equivalent to a
4189   row-major ordering, i.e., for the following matrix, the input data expected is
4190   as shown
4191 .vb
4192         1 0 0
4193         2 0 3     P0
4194        -------
4195         4 5 6     P1
4196 
4197      Process0 [P0] rows_owned=[0,1]
4198         i =  {0,1,3}  [size = nrow+1  = 2+1]
4199         j =  {0,0,2}  [size = 3]
4200         v =  {1,2,3}  [size = 3]
4201 
4202      Process1 [P1] rows_owned=[2]
4203         i =  {0,3}    [size = nrow+1  = 1+1]
4204         j =  {0,1,2}  [size = 3]
4205         v =  {4,5,6}  [size = 3]
4206 .ve
4207 
4208 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4209           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4210 @*/
4211 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4212 {
4213   PetscFunctionBegin;
4214   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4215   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4216   PetscCall(MatCreate(comm, mat));
4217   PetscCall(MatSetSizes(*mat, m, n, M, N));
4218   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4219   PetscCall(MatSetType(*mat, MATMPIAIJ));
4220   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4221   PetscFunctionReturn(PETSC_SUCCESS);
4222 }
4223 
4224 /*@
4225   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4226   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4227   from `MatCreateMPIAIJWithArrays()`
4228 
4229   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4230 
4231   Collective
4232 
4233   Input Parameters:
4234 + mat - the matrix
4235 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4236 . n   - This value should be the same as the local size used in creating the
4237        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4238        calculated if N is given) For square matrices n is almost always m.
4239 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4240 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4241 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4242 . J   - column indices
4243 - v   - matrix values
4244 
4245   Level: deprecated
4246 
4247 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4248           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4249 @*/
4250 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4251 {
4252   PetscInt        nnz, i;
4253   PetscBool       nooffprocentries;
4254   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4255   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4256   PetscScalar    *ad, *ao;
4257   PetscInt        ldi, Iii, md;
4258   const PetscInt *Adi = Ad->i;
4259   PetscInt       *ld  = Aij->ld;
4260 
4261   PetscFunctionBegin;
4262   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4263   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4264   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4265   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4266 
4267   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4268   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4269 
4270   for (i = 0; i < m; i++) {
4271     if (PetscDefined(USE_DEBUG)) {
4272       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4273         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4274         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4275       }
4276     }
4277     nnz = Ii[i + 1] - Ii[i];
4278     Iii = Ii[i];
4279     ldi = ld[i];
4280     md  = Adi[i + 1] - Adi[i];
4281     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4282     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4283     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4284     ad += md;
4285     ao += nnz - md;
4286   }
4287   nooffprocentries      = mat->nooffprocentries;
4288   mat->nooffprocentries = PETSC_TRUE;
4289   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4290   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4291   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4292   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4293   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4294   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4295   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4296   mat->nooffprocentries = nooffprocentries;
4297   PetscFunctionReturn(PETSC_SUCCESS);
4298 }
4299 
4300 /*@
4301   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4302 
4303   Collective
4304 
4305   Input Parameters:
4306 + mat - the matrix
4307 - v   - matrix values, stored by row
4308 
4309   Level: intermediate
4310 
4311   Notes:
4312   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4313 
4314   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4315 
4316 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4317           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4318 @*/
4319 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4320 {
4321   PetscInt        nnz, i, m;
4322   PetscBool       nooffprocentries;
4323   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4324   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4325   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4326   PetscScalar    *ad, *ao;
4327   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4328   PetscInt        ldi, Iii, md;
4329   PetscInt       *ld = Aij->ld;
4330 
4331   PetscFunctionBegin;
4332   m = mat->rmap->n;
4333 
4334   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4335   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4336   Iii = 0;
4337   for (i = 0; i < m; i++) {
4338     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4339     ldi = ld[i];
4340     md  = Adi[i + 1] - Adi[i];
4341     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4342     ad += md;
4343     if (ao) {
4344       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4345       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4346       ao += nnz - md;
4347     }
4348     Iii += nnz;
4349   }
4350   nooffprocentries      = mat->nooffprocentries;
4351   mat->nooffprocentries = PETSC_TRUE;
4352   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4353   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4354   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4355   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4356   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4357   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4358   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4359   mat->nooffprocentries = nooffprocentries;
4360   PetscFunctionReturn(PETSC_SUCCESS);
4361 }
4362 
4363 /*@
4364   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4365   (the default parallel PETSc format).  For good matrix assembly performance
4366   the user should preallocate the matrix storage by setting the parameters
4367   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4368 
4369   Collective
4370 
4371   Input Parameters:
4372 + comm  - MPI communicator
4373 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4374           This value should be the same as the local size used in creating the
4375           y vector for the matrix-vector product y = Ax.
4376 . n     - This value should be the same as the local size used in creating the
4377           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4378           calculated if N is given) For square matrices n is almost always m.
4379 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4380 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4381 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4382           (same value is used for all local rows)
4383 . d_nnz - array containing the number of nonzeros in the various rows of the
4384           DIAGONAL portion of the local submatrix (possibly different for each row)
4385           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4386           The size of this array is equal to the number of local rows, i.e 'm'.
4387 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4388           submatrix (same value is used for all local rows).
4389 - o_nnz - array containing the number of nonzeros in the various rows of the
4390           OFF-DIAGONAL portion of the local submatrix (possibly different for
4391           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4392           structure. The size of this array is equal to the number
4393           of local rows, i.e 'm'.
4394 
4395   Output Parameter:
4396 . A - the matrix
4397 
4398   Options Database Keys:
4399 + -mat_no_inode                     - Do not use inodes
4400 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4401 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4402                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4403                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4404 
4405   Level: intermediate
4406 
4407   Notes:
4408   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4409   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4410   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4411 
4412   If the *_nnz parameter is given then the *_nz parameter is ignored
4413 
4414   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4415   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4416   storage requirements for this matrix.
4417 
4418   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4419   processor than it must be used on all processors that share the object for
4420   that argument.
4421 
4422   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4423   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4424 
4425   The user MUST specify either the local or global matrix dimensions
4426   (possibly both).
4427 
4428   The parallel matrix is partitioned across processors such that the
4429   first `m0` rows belong to process 0, the next `m1` rows belong to
4430   process 1, the next `m2` rows belong to process 2, etc., where
4431   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4432   values corresponding to [m x N] submatrix.
4433 
4434   The columns are logically partitioned with the n0 columns belonging
4435   to 0th partition, the next n1 columns belonging to the next
4436   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4437 
4438   The DIAGONAL portion of the local submatrix on any given processor
4439   is the submatrix corresponding to the rows and columns m,n
4440   corresponding to the given processor. i.e diagonal matrix on
4441   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4442   etc. The remaining portion of the local submatrix [m x (N-n)]
4443   constitute the OFF-DIAGONAL portion. The example below better
4444   illustrates this concept.
4445 
4446   For a square global matrix we define each processor's diagonal portion
4447   to be its local rows and the corresponding columns (a square submatrix);
4448   each processor's off-diagonal portion encompasses the remainder of the
4449   local matrix (a rectangular submatrix).
4450 
4451   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4452 
4453   When calling this routine with a single process communicator, a matrix of
4454   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4455   type of communicator, use the construction mechanism
4456 .vb
4457   MatCreate(..., &A);
4458   MatSetType(A, MATMPIAIJ);
4459   MatSetSizes(A, m, n, M, N);
4460   MatMPIAIJSetPreallocation(A, ...);
4461 .ve
4462 
4463   By default, this format uses inodes (identical nodes) when possible.
4464   We search for consecutive rows with the same nonzero structure, thereby
4465   reusing matrix information to achieve increased efficiency.
4466 
4467   Example Usage:
4468   Consider the following 8x8 matrix with 34 non-zero values, that is
4469   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4470   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4471   as follows
4472 
4473 .vb
4474             1  2  0  |  0  3  0  |  0  4
4475     Proc0   0  5  6  |  7  0  0  |  8  0
4476             9  0 10  | 11  0  0  | 12  0
4477     -------------------------------------
4478            13  0 14  | 15 16 17  |  0  0
4479     Proc1   0 18  0  | 19 20 21  |  0  0
4480             0  0  0  | 22 23  0  | 24  0
4481     -------------------------------------
4482     Proc2  25 26 27  |  0  0 28  | 29  0
4483            30  0  0  | 31 32 33  |  0 34
4484 .ve
4485 
4486   This can be represented as a collection of submatrices as
4487 
4488 .vb
4489       A B C
4490       D E F
4491       G H I
4492 .ve
4493 
4494   Where the submatrices A,B,C are owned by proc0, D,E,F are
4495   owned by proc1, G,H,I are owned by proc2.
4496 
4497   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4498   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4499   The 'M','N' parameters are 8,8, and have the same values on all procs.
4500 
4501   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4502   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4503   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4504   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4505   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4506   matrix, and [DF] as another SeqAIJ matrix.
4507 
4508   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4509   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4510   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4511   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4512   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4513   In this case, the values of `d_nz`,`o_nz` are
4514 .vb
4515      proc0  dnz = 2, o_nz = 2
4516      proc1  dnz = 3, o_nz = 2
4517      proc2  dnz = 1, o_nz = 4
4518 .ve
4519   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4520   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4521   for proc3. i.e we are using 12+15+10=37 storage locations to store
4522   34 values.
4523 
4524   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4525   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4526   In the above case the values for d_nnz,o_nnz are
4527 .vb
4528      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4529      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4530      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4531 .ve
4532   Here the space allocated is sum of all the above values i.e 34, and
4533   hence pre-allocation is perfect.
4534 
4535 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4536           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4537           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4538 @*/
4539 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4540 {
4541   PetscMPIInt size;
4542 
4543   PetscFunctionBegin;
4544   PetscCall(MatCreate(comm, A));
4545   PetscCall(MatSetSizes(*A, m, n, M, N));
4546   PetscCallMPI(MPI_Comm_size(comm, &size));
4547   if (size > 1) {
4548     PetscCall(MatSetType(*A, MATMPIAIJ));
4549     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4550   } else {
4551     PetscCall(MatSetType(*A, MATSEQAIJ));
4552     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4553   }
4554   PetscFunctionReturn(PETSC_SUCCESS);
4555 }
4556 
4557 /*MC
4558     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4559 
4560     Synopsis:
4561     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4562 
4563     Not Collective
4564 
4565     Input Parameter:
4566 .   A - the `MATMPIAIJ` matrix
4567 
4568     Output Parameters:
4569 +   Ad - the diagonal portion of the matrix
4570 .   Ao - the off-diagonal portion of the matrix
4571 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4572 -   ierr - error code
4573 
4574      Level: advanced
4575 
4576     Note:
4577     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4578 
4579 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4580 M*/
4581 
4582 /*MC
4583     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4584 
4585     Synopsis:
4586     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4587 
4588     Not Collective
4589 
4590     Input Parameters:
4591 +   A - the `MATMPIAIJ` matrix
4592 .   Ad - the diagonal portion of the matrix
4593 .   Ao - the off-diagonal portion of the matrix
4594 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4595 -   ierr - error code
4596 
4597      Level: advanced
4598 
4599 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4600 M*/
4601 
4602 /*@C
4603   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4604 
4605   Not Collective
4606 
4607   Input Parameter:
4608 . A - The `MATMPIAIJ` matrix
4609 
4610   Output Parameters:
4611 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4612 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4613 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4614 
4615   Level: intermediate
4616 
4617   Note:
4618   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4619   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4620   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4621   local column numbers to global column numbers in the original matrix.
4622 
4623   Fortran Notes:
4624   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4625 
4626 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4627 @*/
4628 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4629 {
4630   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4631   PetscBool   flg;
4632 
4633   PetscFunctionBegin;
4634   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4635   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4636   if (Ad) *Ad = a->A;
4637   if (Ao) *Ao = a->B;
4638   if (colmap) *colmap = a->garray;
4639   PetscFunctionReturn(PETSC_SUCCESS);
4640 }
4641 
4642 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4643 {
4644   PetscInt     m, N, i, rstart, nnz, Ii;
4645   PetscInt    *indx;
4646   PetscScalar *values;
4647   MatType      rootType;
4648 
4649   PetscFunctionBegin;
4650   PetscCall(MatGetSize(inmat, &m, &N));
4651   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4652     PetscInt *dnz, *onz, sum, bs, cbs;
4653 
4654     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4655     /* Check sum(n) = N */
4656     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4657     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4658 
4659     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4660     rstart -= m;
4661 
4662     MatPreallocateBegin(comm, m, n, dnz, onz);
4663     for (i = 0; i < m; i++) {
4664       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4665       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4666       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4667     }
4668 
4669     PetscCall(MatCreate(comm, outmat));
4670     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4671     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4672     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4673     PetscCall(MatGetRootType_Private(inmat, &rootType));
4674     PetscCall(MatSetType(*outmat, rootType));
4675     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4676     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4677     MatPreallocateEnd(dnz, onz);
4678     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4679   }
4680 
4681   /* numeric phase */
4682   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4683   for (i = 0; i < m; i++) {
4684     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4685     Ii = i + rstart;
4686     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4687     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4688   }
4689   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4690   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4691   PetscFunctionReturn(PETSC_SUCCESS);
4692 }
4693 
4694 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4695 {
4696   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4697 
4698   PetscFunctionBegin;
4699   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4700   PetscCall(PetscFree(merge->id_r));
4701   PetscCall(PetscFree(merge->len_s));
4702   PetscCall(PetscFree(merge->len_r));
4703   PetscCall(PetscFree(merge->bi));
4704   PetscCall(PetscFree(merge->bj));
4705   PetscCall(PetscFree(merge->buf_ri[0]));
4706   PetscCall(PetscFree(merge->buf_ri));
4707   PetscCall(PetscFree(merge->buf_rj[0]));
4708   PetscCall(PetscFree(merge->buf_rj));
4709   PetscCall(PetscFree(merge->coi));
4710   PetscCall(PetscFree(merge->coj));
4711   PetscCall(PetscFree(merge->owners_co));
4712   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4713   PetscCall(PetscFree(merge));
4714   PetscFunctionReturn(PETSC_SUCCESS);
4715 }
4716 
4717 #include <../src/mat/utils/freespace.h>
4718 #include <petscbt.h>
4719 
4720 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4721 {
4722   MPI_Comm             comm;
4723   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4724   PetscMPIInt          size, rank, taga, *len_s;
4725   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4726   PetscMPIInt          proc, k;
4727   PetscInt           **buf_ri, **buf_rj;
4728   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4729   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4730   MPI_Request         *s_waits, *r_waits;
4731   MPI_Status          *status;
4732   const MatScalar     *aa, *a_a;
4733   MatScalar          **abuf_r, *ba_i;
4734   Mat_Merge_SeqsToMPI *merge;
4735   PetscContainer       container;
4736 
4737   PetscFunctionBegin;
4738   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4739   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4740 
4741   PetscCallMPI(MPI_Comm_size(comm, &size));
4742   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4743 
4744   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4745   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4746   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4747   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4748   aa = a_a;
4749 
4750   bi     = merge->bi;
4751   bj     = merge->bj;
4752   buf_ri = merge->buf_ri;
4753   buf_rj = merge->buf_rj;
4754 
4755   PetscCall(PetscMalloc1(size, &status));
4756   owners = merge->rowmap->range;
4757   len_s  = merge->len_s;
4758 
4759   /* send and recv matrix values */
4760   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4761   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4762 
4763   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4764   for (proc = 0, k = 0; proc < size; proc++) {
4765     if (!len_s[proc]) continue;
4766     i = owners[proc];
4767     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4768     k++;
4769   }
4770 
4771   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4772   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4773   PetscCall(PetscFree(status));
4774 
4775   PetscCall(PetscFree(s_waits));
4776   PetscCall(PetscFree(r_waits));
4777 
4778   /* insert mat values of mpimat */
4779   PetscCall(PetscMalloc1(N, &ba_i));
4780   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4781 
4782   for (k = 0; k < merge->nrecv; k++) {
4783     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4784     nrows       = *buf_ri_k[k];
4785     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4786     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4787   }
4788 
4789   /* set values of ba */
4790   m = merge->rowmap->n;
4791   for (i = 0; i < m; i++) {
4792     arow = owners[rank] + i;
4793     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4794     bnzi = bi[i + 1] - bi[i];
4795     PetscCall(PetscArrayzero(ba_i, bnzi));
4796 
4797     /* add local non-zero vals of this proc's seqmat into ba */
4798     anzi   = ai[arow + 1] - ai[arow];
4799     aj     = a->j + ai[arow];
4800     aa     = a_a + ai[arow];
4801     nextaj = 0;
4802     for (j = 0; nextaj < anzi; j++) {
4803       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4804         ba_i[j] += aa[nextaj++];
4805       }
4806     }
4807 
4808     /* add received vals into ba */
4809     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4810       /* i-th row */
4811       if (i == *nextrow[k]) {
4812         anzi   = *(nextai[k] + 1) - *nextai[k];
4813         aj     = buf_rj[k] + *nextai[k];
4814         aa     = abuf_r[k] + *nextai[k];
4815         nextaj = 0;
4816         for (j = 0; nextaj < anzi; j++) {
4817           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4818             ba_i[j] += aa[nextaj++];
4819           }
4820         }
4821         nextrow[k]++;
4822         nextai[k]++;
4823       }
4824     }
4825     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4826   }
4827   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4828   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4829   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4830 
4831   PetscCall(PetscFree(abuf_r[0]));
4832   PetscCall(PetscFree(abuf_r));
4833   PetscCall(PetscFree(ba_i));
4834   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4835   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4836   PetscFunctionReturn(PETSC_SUCCESS);
4837 }
4838 
4839 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4840 {
4841   Mat                  B_mpi;
4842   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4843   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4844   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4845   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4846   PetscInt             len, *dnz, *onz, bs, cbs;
4847   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4848   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4849   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4850   MPI_Status          *status;
4851   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4852   PetscBT              lnkbt;
4853   Mat_Merge_SeqsToMPI *merge;
4854   PetscContainer       container;
4855 
4856   PetscFunctionBegin;
4857   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4858 
4859   /* make sure it is a PETSc comm */
4860   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4861   PetscCallMPI(MPI_Comm_size(comm, &size));
4862   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4863 
4864   PetscCall(PetscNew(&merge));
4865   PetscCall(PetscMalloc1(size, &status));
4866 
4867   /* determine row ownership */
4868   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4869   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4870   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4871   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4872   PetscCall(PetscLayoutSetUp(merge->rowmap));
4873   PetscCall(PetscMalloc1(size, &len_si));
4874   PetscCall(PetscMalloc1(size, &merge->len_s));
4875 
4876   m      = merge->rowmap->n;
4877   owners = merge->rowmap->range;
4878 
4879   /* determine the number of messages to send, their lengths */
4880   len_s = merge->len_s;
4881 
4882   len          = 0; /* length of buf_si[] */
4883   merge->nsend = 0;
4884   for (PetscMPIInt proc = 0; proc < size; proc++) {
4885     len_si[proc] = 0;
4886     if (proc == rank) {
4887       len_s[proc] = 0;
4888     } else {
4889       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4890       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4891     }
4892     if (len_s[proc]) {
4893       merge->nsend++;
4894       nrows = 0;
4895       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4896         if (ai[i + 1] > ai[i]) nrows++;
4897       }
4898       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4899       len += len_si[proc];
4900     }
4901   }
4902 
4903   /* determine the number and length of messages to receive for ij-structure */
4904   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4905   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4906 
4907   /* post the Irecv of j-structure */
4908   PetscCall(PetscCommGetNewTag(comm, &tagj));
4909   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4910 
4911   /* post the Isend of j-structure */
4912   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4913 
4914   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4915     if (!len_s[proc]) continue;
4916     i = owners[proc];
4917     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4918     k++;
4919   }
4920 
4921   /* receives and sends of j-structure are complete */
4922   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4923   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4924 
4925   /* send and recv i-structure */
4926   PetscCall(PetscCommGetNewTag(comm, &tagi));
4927   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4928 
4929   PetscCall(PetscMalloc1(len + 1, &buf_s));
4930   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4931   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4932     if (!len_s[proc]) continue;
4933     /* form outgoing message for i-structure:
4934          buf_si[0]:                 nrows to be sent
4935                [1:nrows]:           row index (global)
4936                [nrows+1:2*nrows+1]: i-structure index
4937     */
4938     nrows       = len_si[proc] / 2 - 1;
4939     buf_si_i    = buf_si + nrows + 1;
4940     buf_si[0]   = nrows;
4941     buf_si_i[0] = 0;
4942     nrows       = 0;
4943     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4944       anzi = ai[i + 1] - ai[i];
4945       if (anzi) {
4946         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4947         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4948         nrows++;
4949       }
4950     }
4951     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4952     k++;
4953     buf_si += len_si[proc];
4954   }
4955 
4956   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4957   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4958 
4959   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4960   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4961 
4962   PetscCall(PetscFree(len_si));
4963   PetscCall(PetscFree(len_ri));
4964   PetscCall(PetscFree(rj_waits));
4965   PetscCall(PetscFree2(si_waits, sj_waits));
4966   PetscCall(PetscFree(ri_waits));
4967   PetscCall(PetscFree(buf_s));
4968   PetscCall(PetscFree(status));
4969 
4970   /* compute a local seq matrix in each processor */
4971   /* allocate bi array and free space for accumulating nonzero column info */
4972   PetscCall(PetscMalloc1(m + 1, &bi));
4973   bi[0] = 0;
4974 
4975   /* create and initialize a linked list */
4976   nlnk = N + 1;
4977   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4978 
4979   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4980   len = ai[owners[rank + 1]] - ai[owners[rank]];
4981   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4982 
4983   current_space = free_space;
4984 
4985   /* determine symbolic info for each local row */
4986   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4987 
4988   for (k = 0; k < merge->nrecv; k++) {
4989     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4990     nrows       = *buf_ri_k[k];
4991     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4992     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4993   }
4994 
4995   MatPreallocateBegin(comm, m, n, dnz, onz);
4996   len = 0;
4997   for (i = 0; i < m; i++) {
4998     bnzi = 0;
4999     /* add local non-zero cols of this proc's seqmat into lnk */
5000     arow = owners[rank] + i;
5001     anzi = ai[arow + 1] - ai[arow];
5002     aj   = a->j + ai[arow];
5003     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5004     bnzi += nlnk;
5005     /* add received col data into lnk */
5006     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5007       if (i == *nextrow[k]) {            /* i-th row */
5008         anzi = *(nextai[k] + 1) - *nextai[k];
5009         aj   = buf_rj[k] + *nextai[k];
5010         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5011         bnzi += nlnk;
5012         nextrow[k]++;
5013         nextai[k]++;
5014       }
5015     }
5016     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5017 
5018     /* if free space is not available, make more free space */
5019     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5020     /* copy data into free space, then initialize lnk */
5021     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5022     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5023 
5024     current_space->array += bnzi;
5025     current_space->local_used += bnzi;
5026     current_space->local_remaining -= bnzi;
5027 
5028     bi[i + 1] = bi[i] + bnzi;
5029   }
5030 
5031   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5032 
5033   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5034   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5035   PetscCall(PetscLLDestroy(lnk, lnkbt));
5036 
5037   /* create symbolic parallel matrix B_mpi */
5038   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5039   PetscCall(MatCreate(comm, &B_mpi));
5040   if (n == PETSC_DECIDE) {
5041     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5042   } else {
5043     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5044   }
5045   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5046   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5047   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5048   MatPreallocateEnd(dnz, onz);
5049   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5050 
5051   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5052   B_mpi->assembled = PETSC_FALSE;
5053   merge->bi        = bi;
5054   merge->bj        = bj;
5055   merge->buf_ri    = buf_ri;
5056   merge->buf_rj    = buf_rj;
5057   merge->coi       = NULL;
5058   merge->coj       = NULL;
5059   merge->owners_co = NULL;
5060 
5061   PetscCall(PetscCommDestroy(&comm));
5062 
5063   /* attach the supporting struct to B_mpi for reuse */
5064   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5065   PetscCall(PetscContainerSetPointer(container, merge));
5066   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5067   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5068   PetscCall(PetscContainerDestroy(&container));
5069   *mpimat = B_mpi;
5070 
5071   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5072   PetscFunctionReturn(PETSC_SUCCESS);
5073 }
5074 
5075 /*@
5076   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5077   matrices from each processor
5078 
5079   Collective
5080 
5081   Input Parameters:
5082 + comm   - the communicators the parallel matrix will live on
5083 . seqmat - the input sequential matrices
5084 . m      - number of local rows (or `PETSC_DECIDE`)
5085 . n      - number of local columns (or `PETSC_DECIDE`)
5086 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5087 
5088   Output Parameter:
5089 . mpimat - the parallel matrix generated
5090 
5091   Level: advanced
5092 
5093   Note:
5094   The dimensions of the sequential matrix in each processor MUST be the same.
5095   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5096   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5097 
5098 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5099 @*/
5100 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5101 {
5102   PetscMPIInt size;
5103 
5104   PetscFunctionBegin;
5105   PetscCallMPI(MPI_Comm_size(comm, &size));
5106   if (size == 1) {
5107     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5108     if (scall == MAT_INITIAL_MATRIX) {
5109       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5110     } else {
5111       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5112     }
5113     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5114     PetscFunctionReturn(PETSC_SUCCESS);
5115   }
5116   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5117   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5118   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5119   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5120   PetscFunctionReturn(PETSC_SUCCESS);
5121 }
5122 
5123 /*@
5124   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5125 
5126   Not Collective
5127 
5128   Input Parameter:
5129 . A - the matrix
5130 
5131   Output Parameter:
5132 . A_loc - the local sequential matrix generated
5133 
5134   Level: developer
5135 
5136   Notes:
5137   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5138   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5139   `n` is the global column count obtained with `MatGetSize()`
5140 
5141   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5142 
5143   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5144 
5145   Destroy the matrix with `MatDestroy()`
5146 
5147 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5148 @*/
5149 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5150 {
5151   PetscBool mpi;
5152 
5153   PetscFunctionBegin;
5154   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5155   if (mpi) {
5156     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5157   } else {
5158     *A_loc = A;
5159     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5160   }
5161   PetscFunctionReturn(PETSC_SUCCESS);
5162 }
5163 
5164 /*@
5165   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5166 
5167   Not Collective
5168 
5169   Input Parameters:
5170 + A     - the matrix
5171 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5172 
5173   Output Parameter:
5174 . A_loc - the local sequential matrix generated
5175 
5176   Level: developer
5177 
5178   Notes:
5179   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5180   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5181   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5182 
5183   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5184 
5185   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5186   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5187   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5188   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5189 
5190 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5191 @*/
5192 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5193 {
5194   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5195   Mat_SeqAIJ        *mat, *a, *b;
5196   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5197   const PetscScalar *aa, *ba, *aav, *bav;
5198   PetscScalar       *ca, *cam;
5199   PetscMPIInt        size;
5200   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5201   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5202   PetscBool          match;
5203 
5204   PetscFunctionBegin;
5205   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5206   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5207   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5208   if (size == 1) {
5209     if (scall == MAT_INITIAL_MATRIX) {
5210       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5211       *A_loc = mpimat->A;
5212     } else if (scall == MAT_REUSE_MATRIX) {
5213       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5214     }
5215     PetscFunctionReturn(PETSC_SUCCESS);
5216   }
5217 
5218   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5219   a  = (Mat_SeqAIJ *)mpimat->A->data;
5220   b  = (Mat_SeqAIJ *)mpimat->B->data;
5221   ai = a->i;
5222   aj = a->j;
5223   bi = b->i;
5224   bj = b->j;
5225   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5226   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5227   aa = aav;
5228   ba = bav;
5229   if (scall == MAT_INITIAL_MATRIX) {
5230     PetscCall(PetscMalloc1(1 + am, &ci));
5231     ci[0] = 0;
5232     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5233     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5234     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5235     k = 0;
5236     for (i = 0; i < am; i++) {
5237       ncols_o = bi[i + 1] - bi[i];
5238       ncols_d = ai[i + 1] - ai[i];
5239       /* off-diagonal portion of A */
5240       for (jo = 0; jo < ncols_o; jo++) {
5241         col = cmap[*bj];
5242         if (col >= cstart) break;
5243         cj[k] = col;
5244         bj++;
5245         ca[k++] = *ba++;
5246       }
5247       /* diagonal portion of A */
5248       for (j = 0; j < ncols_d; j++) {
5249         cj[k]   = cstart + *aj++;
5250         ca[k++] = *aa++;
5251       }
5252       /* off-diagonal portion of A */
5253       for (j = jo; j < ncols_o; j++) {
5254         cj[k]   = cmap[*bj++];
5255         ca[k++] = *ba++;
5256       }
5257     }
5258     /* put together the new matrix */
5259     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5260     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5261     /* Since these are PETSc arrays, change flags to free them as necessary. */
5262     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5263     mat->free_a  = PETSC_TRUE;
5264     mat->free_ij = PETSC_TRUE;
5265     mat->nonew   = 0;
5266   } else if (scall == MAT_REUSE_MATRIX) {
5267     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5268     ci  = mat->i;
5269     cj  = mat->j;
5270     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5271     for (i = 0; i < am; i++) {
5272       /* off-diagonal portion of A */
5273       ncols_o = bi[i + 1] - bi[i];
5274       for (jo = 0; jo < ncols_o; jo++) {
5275         col = cmap[*bj];
5276         if (col >= cstart) break;
5277         *cam++ = *ba++;
5278         bj++;
5279       }
5280       /* diagonal portion of A */
5281       ncols_d = ai[i + 1] - ai[i];
5282       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5283       /* off-diagonal portion of A */
5284       for (j = jo; j < ncols_o; j++) {
5285         *cam++ = *ba++;
5286         bj++;
5287       }
5288     }
5289     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5290   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5291   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5292   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5293   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5294   PetscFunctionReturn(PETSC_SUCCESS);
5295 }
5296 
5297 /*@
5298   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5299   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5300 
5301   Not Collective
5302 
5303   Input Parameters:
5304 + A     - the matrix
5305 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5306 
5307   Output Parameters:
5308 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5309 - A_loc - the local sequential matrix generated
5310 
5311   Level: developer
5312 
5313   Note:
5314   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5315   part, then those associated with the off-diagonal part (in its local ordering)
5316 
5317 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5318 @*/
5319 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5320 {
5321   Mat             Ao, Ad;
5322   const PetscInt *cmap;
5323   PetscMPIInt     size;
5324   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5325 
5326   PetscFunctionBegin;
5327   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5328   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5329   if (size == 1) {
5330     if (scall == MAT_INITIAL_MATRIX) {
5331       PetscCall(PetscObjectReference((PetscObject)Ad));
5332       *A_loc = Ad;
5333     } else if (scall == MAT_REUSE_MATRIX) {
5334       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5335     }
5336     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5337     PetscFunctionReturn(PETSC_SUCCESS);
5338   }
5339   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5340   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5341   if (f) {
5342     PetscCall((*f)(A, scall, glob, A_loc));
5343   } else {
5344     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5345     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5346     Mat_SeqAIJ        *c;
5347     PetscInt          *ai = a->i, *aj = a->j;
5348     PetscInt          *bi = b->i, *bj = b->j;
5349     PetscInt          *ci, *cj;
5350     const PetscScalar *aa, *ba;
5351     PetscScalar       *ca;
5352     PetscInt           i, j, am, dn, on;
5353 
5354     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5355     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5356     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5357     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5358     if (scall == MAT_INITIAL_MATRIX) {
5359       PetscInt k;
5360       PetscCall(PetscMalloc1(1 + am, &ci));
5361       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5362       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5363       ci[0] = 0;
5364       for (i = 0, k = 0; i < am; i++) {
5365         const PetscInt ncols_o = bi[i + 1] - bi[i];
5366         const PetscInt ncols_d = ai[i + 1] - ai[i];
5367         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5368         /* diagonal portion of A */
5369         for (j = 0; j < ncols_d; j++, k++) {
5370           cj[k] = *aj++;
5371           ca[k] = *aa++;
5372         }
5373         /* off-diagonal portion of A */
5374         for (j = 0; j < ncols_o; j++, k++) {
5375           cj[k] = dn + *bj++;
5376           ca[k] = *ba++;
5377         }
5378       }
5379       /* put together the new matrix */
5380       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5381       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5382       /* Since these are PETSc arrays, change flags to free them as necessary. */
5383       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5384       c->free_a  = PETSC_TRUE;
5385       c->free_ij = PETSC_TRUE;
5386       c->nonew   = 0;
5387       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5388     } else if (scall == MAT_REUSE_MATRIX) {
5389       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5390       for (i = 0; i < am; i++) {
5391         const PetscInt ncols_d = ai[i + 1] - ai[i];
5392         const PetscInt ncols_o = bi[i + 1] - bi[i];
5393         /* diagonal portion of A */
5394         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5395         /* off-diagonal portion of A */
5396         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5397       }
5398       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5399     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5400     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5401     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5402     if (glob) {
5403       PetscInt cst, *gidx;
5404 
5405       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5406       PetscCall(PetscMalloc1(dn + on, &gidx));
5407       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5408       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5409       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5410     }
5411   }
5412   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5413   PetscFunctionReturn(PETSC_SUCCESS);
5414 }
5415 
5416 /*@C
5417   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5418 
5419   Not Collective
5420 
5421   Input Parameters:
5422 + A     - the matrix
5423 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5424 . row   - index set of rows to extract (or `NULL`)
5425 - col   - index set of columns to extract (or `NULL`)
5426 
5427   Output Parameter:
5428 . A_loc - the local sequential matrix generated
5429 
5430   Level: developer
5431 
5432 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5433 @*/
5434 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5435 {
5436   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5437   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5438   IS          isrowa, iscola;
5439   Mat        *aloc;
5440   PetscBool   match;
5441 
5442   PetscFunctionBegin;
5443   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5444   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5445   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5446   if (!row) {
5447     start = A->rmap->rstart;
5448     end   = A->rmap->rend;
5449     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5450   } else {
5451     isrowa = *row;
5452   }
5453   if (!col) {
5454     start = A->cmap->rstart;
5455     cmap  = a->garray;
5456     nzA   = a->A->cmap->n;
5457     nzB   = a->B->cmap->n;
5458     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5459     ncols = 0;
5460     for (i = 0; i < nzB; i++) {
5461       if (cmap[i] < start) idx[ncols++] = cmap[i];
5462       else break;
5463     }
5464     imark = i;
5465     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5466     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5467     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5468   } else {
5469     iscola = *col;
5470   }
5471   if (scall != MAT_INITIAL_MATRIX) {
5472     PetscCall(PetscMalloc1(1, &aloc));
5473     aloc[0] = *A_loc;
5474   }
5475   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5476   if (!col) { /* attach global id of condensed columns */
5477     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5478   }
5479   *A_loc = aloc[0];
5480   PetscCall(PetscFree(aloc));
5481   if (!row) PetscCall(ISDestroy(&isrowa));
5482   if (!col) PetscCall(ISDestroy(&iscola));
5483   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5484   PetscFunctionReturn(PETSC_SUCCESS);
5485 }
5486 
5487 /*
5488  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5489  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5490  * on a global size.
5491  * */
5492 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5493 {
5494   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5495   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5496   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5497   PetscMPIInt            owner;
5498   PetscSFNode           *iremote, *oiremote;
5499   const PetscInt        *lrowindices;
5500   PetscSF                sf, osf;
5501   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5502   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5503   MPI_Comm               comm;
5504   ISLocalToGlobalMapping mapping;
5505   const PetscScalar     *pd_a, *po_a;
5506 
5507   PetscFunctionBegin;
5508   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5509   /* plocalsize is the number of roots
5510    * nrows is the number of leaves
5511    * */
5512   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5513   PetscCall(ISGetLocalSize(rows, &nrows));
5514   PetscCall(PetscCalloc1(nrows, &iremote));
5515   PetscCall(ISGetIndices(rows, &lrowindices));
5516   for (i = 0; i < nrows; i++) {
5517     /* Find a remote index and an owner for a row
5518      * The row could be local or remote
5519      * */
5520     owner = 0;
5521     lidx  = 0;
5522     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5523     iremote[i].index = lidx;
5524     iremote[i].rank  = owner;
5525   }
5526   /* Create SF to communicate how many nonzero columns for each row */
5527   PetscCall(PetscSFCreate(comm, &sf));
5528   /* SF will figure out the number of nonzero columns for each row, and their
5529    * offsets
5530    * */
5531   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5532   PetscCall(PetscSFSetFromOptions(sf));
5533   PetscCall(PetscSFSetUp(sf));
5534 
5535   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5536   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5537   PetscCall(PetscCalloc1(nrows, &pnnz));
5538   roffsets[0] = 0;
5539   roffsets[1] = 0;
5540   for (i = 0; i < plocalsize; i++) {
5541     /* diagonal */
5542     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5543     /* off-diagonal */
5544     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5545     /* compute offsets so that we relative location for each row */
5546     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5547     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5548   }
5549   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5550   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5551   /* 'r' means root, and 'l' means leaf */
5552   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5553   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5554   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5555   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5556   PetscCall(PetscSFDestroy(&sf));
5557   PetscCall(PetscFree(roffsets));
5558   PetscCall(PetscFree(nrcols));
5559   dntotalcols = 0;
5560   ontotalcols = 0;
5561   ncol        = 0;
5562   for (i = 0; i < nrows; i++) {
5563     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5564     ncol    = PetscMax(pnnz[i], ncol);
5565     /* diagonal */
5566     dntotalcols += nlcols[i * 2 + 0];
5567     /* off-diagonal */
5568     ontotalcols += nlcols[i * 2 + 1];
5569   }
5570   /* We do not need to figure the right number of columns
5571    * since all the calculations will be done by going through the raw data
5572    * */
5573   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5574   PetscCall(MatSetUp(*P_oth));
5575   PetscCall(PetscFree(pnnz));
5576   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5577   /* diagonal */
5578   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5579   /* off-diagonal */
5580   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5581   /* diagonal */
5582   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5583   /* off-diagonal */
5584   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5585   dntotalcols = 0;
5586   ontotalcols = 0;
5587   ntotalcols  = 0;
5588   for (i = 0; i < nrows; i++) {
5589     owner = 0;
5590     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5591     /* Set iremote for diag matrix */
5592     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5593       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5594       iremote[dntotalcols].rank  = owner;
5595       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5596       ilocal[dntotalcols++] = ntotalcols++;
5597     }
5598     /* off-diagonal */
5599     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5600       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5601       oiremote[ontotalcols].rank  = owner;
5602       oilocal[ontotalcols++]      = ntotalcols++;
5603     }
5604   }
5605   PetscCall(ISRestoreIndices(rows, &lrowindices));
5606   PetscCall(PetscFree(loffsets));
5607   PetscCall(PetscFree(nlcols));
5608   PetscCall(PetscSFCreate(comm, &sf));
5609   /* P serves as roots and P_oth is leaves
5610    * Diag matrix
5611    * */
5612   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5613   PetscCall(PetscSFSetFromOptions(sf));
5614   PetscCall(PetscSFSetUp(sf));
5615 
5616   PetscCall(PetscSFCreate(comm, &osf));
5617   /* off-diagonal */
5618   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5619   PetscCall(PetscSFSetFromOptions(osf));
5620   PetscCall(PetscSFSetUp(osf));
5621   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5622   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5623   /* operate on the matrix internal data to save memory */
5624   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5625   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5626   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5627   /* Convert to global indices for diag matrix */
5628   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5629   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5630   /* We want P_oth store global indices */
5631   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5632   /* Use memory scalable approach */
5633   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5634   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5635   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5636   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5637   /* Convert back to local indices */
5638   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5639   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5640   nout = 0;
5641   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5642   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5643   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5644   /* Exchange values */
5645   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5646   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5647   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5648   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5649   /* Stop PETSc from shrinking memory */
5650   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5651   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5652   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5653   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5654   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5655   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5656   PetscCall(PetscSFDestroy(&sf));
5657   PetscCall(PetscSFDestroy(&osf));
5658   PetscFunctionReturn(PETSC_SUCCESS);
5659 }
5660 
5661 /*
5662  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5663  * This supports MPIAIJ and MAIJ
5664  * */
5665 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5666 {
5667   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5668   Mat_SeqAIJ *p_oth;
5669   IS          rows, map;
5670   PetscHMapI  hamp;
5671   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5672   MPI_Comm    comm;
5673   PetscSF     sf, osf;
5674   PetscBool   has;
5675 
5676   PetscFunctionBegin;
5677   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5678   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5679   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5680    *  and then create a submatrix (that often is an overlapping matrix)
5681    * */
5682   if (reuse == MAT_INITIAL_MATRIX) {
5683     /* Use a hash table to figure out unique keys */
5684     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5685     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5686     count = 0;
5687     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5688     for (i = 0; i < a->B->cmap->n; i++) {
5689       key = a->garray[i] / dof;
5690       PetscCall(PetscHMapIHas(hamp, key, &has));
5691       if (!has) {
5692         mapping[i] = count;
5693         PetscCall(PetscHMapISet(hamp, key, count++));
5694       } else {
5695         /* Current 'i' has the same value the previous step */
5696         mapping[i] = count - 1;
5697       }
5698     }
5699     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5700     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5701     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5702     PetscCall(PetscCalloc1(htsize, &rowindices));
5703     off = 0;
5704     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5705     PetscCall(PetscHMapIDestroy(&hamp));
5706     PetscCall(PetscSortInt(htsize, rowindices));
5707     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5708     /* In case, the matrix was already created but users want to recreate the matrix */
5709     PetscCall(MatDestroy(P_oth));
5710     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5711     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5712     PetscCall(ISDestroy(&map));
5713     PetscCall(ISDestroy(&rows));
5714   } else if (reuse == MAT_REUSE_MATRIX) {
5715     /* If matrix was already created, we simply update values using SF objects
5716      * that as attached to the matrix earlier.
5717      */
5718     const PetscScalar *pd_a, *po_a;
5719 
5720     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5721     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5722     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5723     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5724     /* Update values in place */
5725     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5726     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5727     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5728     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5729     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5730     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5731     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5732     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5733   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5734   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5735   PetscFunctionReturn(PETSC_SUCCESS);
5736 }
5737 
5738 /*@C
5739   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5740 
5741   Collective
5742 
5743   Input Parameters:
5744 + A     - the first matrix in `MATMPIAIJ` format
5745 . B     - the second matrix in `MATMPIAIJ` format
5746 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5747 
5748   Output Parameters:
5749 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5750 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5751 - B_seq - the sequential matrix generated
5752 
5753   Level: developer
5754 
5755 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5756 @*/
5757 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5758 {
5759   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5760   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5761   IS          isrowb, iscolb;
5762   Mat        *bseq = NULL;
5763 
5764   PetscFunctionBegin;
5765   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5766              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5767   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5768 
5769   if (scall == MAT_INITIAL_MATRIX) {
5770     start = A->cmap->rstart;
5771     cmap  = a->garray;
5772     nzA   = a->A->cmap->n;
5773     nzB   = a->B->cmap->n;
5774     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5775     ncols = 0;
5776     for (i = 0; i < nzB; i++) { /* row < local row index */
5777       if (cmap[i] < start) idx[ncols++] = cmap[i];
5778       else break;
5779     }
5780     imark = i;
5781     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5782     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5783     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5784     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5785   } else {
5786     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5787     isrowb = *rowb;
5788     iscolb = *colb;
5789     PetscCall(PetscMalloc1(1, &bseq));
5790     bseq[0] = *B_seq;
5791   }
5792   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5793   *B_seq = bseq[0];
5794   PetscCall(PetscFree(bseq));
5795   if (!rowb) {
5796     PetscCall(ISDestroy(&isrowb));
5797   } else {
5798     *rowb = isrowb;
5799   }
5800   if (!colb) {
5801     PetscCall(ISDestroy(&iscolb));
5802   } else {
5803     *colb = iscolb;
5804   }
5805   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5806   PetscFunctionReturn(PETSC_SUCCESS);
5807 }
5808 
5809 /*
5810     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5811     of the OFF-DIAGONAL portion of local A
5812 
5813     Collective
5814 
5815    Input Parameters:
5816 +    A,B - the matrices in `MATMPIAIJ` format
5817 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5818 
5819    Output Parameter:
5820 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5821 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5822 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5823 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5824 
5825     Developer Note:
5826     This directly accesses information inside the VecScatter associated with the matrix-vector product
5827      for this matrix. This is not desirable..
5828 
5829     Level: developer
5830 
5831 */
5832 
5833 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5834 {
5835   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5836   VecScatter         ctx;
5837   MPI_Comm           comm;
5838   const PetscMPIInt *rprocs, *sprocs;
5839   PetscMPIInt        nrecvs, nsends;
5840   const PetscInt    *srow, *rstarts, *sstarts;
5841   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5842   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5843   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5844   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5845   PetscMPIInt        size, tag, rank, nreqs;
5846 
5847   PetscFunctionBegin;
5848   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5849   PetscCallMPI(MPI_Comm_size(comm, &size));
5850 
5851   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5852              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5853   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5854   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5855 
5856   if (size == 1) {
5857     startsj_s = NULL;
5858     bufa_ptr  = NULL;
5859     *B_oth    = NULL;
5860     PetscFunctionReturn(PETSC_SUCCESS);
5861   }
5862 
5863   ctx = a->Mvctx;
5864   tag = ((PetscObject)ctx)->tag;
5865 
5866   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5867   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5868   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5869   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5870   PetscCall(PetscMalloc1(nreqs, &reqs));
5871   rwaits = reqs;
5872   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5873 
5874   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5875   if (scall == MAT_INITIAL_MATRIX) {
5876     /* i-array */
5877     /*  post receives */
5878     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5879     for (i = 0; i < nrecvs; i++) {
5880       rowlen = rvalues + rstarts[i] * rbs;
5881       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5882       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5883     }
5884 
5885     /* pack the outgoing message */
5886     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5887 
5888     sstartsj[0] = 0;
5889     rstartsj[0] = 0;
5890     len         = 0; /* total length of j or a array to be sent */
5891     if (nsends) {
5892       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5893       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5894     }
5895     for (i = 0; i < nsends; i++) {
5896       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5897       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5898       for (j = 0; j < nrows; j++) {
5899         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5900         for (l = 0; l < sbs; l++) {
5901           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5902 
5903           rowlen[j * sbs + l] = ncols;
5904 
5905           len += ncols;
5906           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5907         }
5908         k++;
5909       }
5910       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5911 
5912       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5913     }
5914     /* recvs and sends of i-array are completed */
5915     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5916     PetscCall(PetscFree(svalues));
5917 
5918     /* allocate buffers for sending j and a arrays */
5919     PetscCall(PetscMalloc1(len + 1, &bufj));
5920     PetscCall(PetscMalloc1(len + 1, &bufa));
5921 
5922     /* create i-array of B_oth */
5923     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5924 
5925     b_othi[0] = 0;
5926     len       = 0; /* total length of j or a array to be received */
5927     k         = 0;
5928     for (i = 0; i < nrecvs; i++) {
5929       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5930       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5931       for (j = 0; j < nrows; j++) {
5932         b_othi[k + 1] = b_othi[k] + rowlen[j];
5933         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5934         k++;
5935       }
5936       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5937     }
5938     PetscCall(PetscFree(rvalues));
5939 
5940     /* allocate space for j and a arrays of B_oth */
5941     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5942     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5943 
5944     /* j-array */
5945     /*  post receives of j-array */
5946     for (i = 0; i < nrecvs; i++) {
5947       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5948       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5949     }
5950 
5951     /* pack the outgoing message j-array */
5952     if (nsends) k = sstarts[0];
5953     for (i = 0; i < nsends; i++) {
5954       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5955       bufJ  = bufj + sstartsj[i];
5956       for (j = 0; j < nrows; j++) {
5957         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5958         for (ll = 0; ll < sbs; ll++) {
5959           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5960           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5961           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5962         }
5963       }
5964       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5965     }
5966 
5967     /* recvs and sends of j-array are completed */
5968     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5969   } else if (scall == MAT_REUSE_MATRIX) {
5970     sstartsj = *startsj_s;
5971     rstartsj = *startsj_r;
5972     bufa     = *bufa_ptr;
5973     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5974   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5975 
5976   /* a-array */
5977   /*  post receives of a-array */
5978   for (i = 0; i < nrecvs; i++) {
5979     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5980     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5981   }
5982 
5983   /* pack the outgoing message a-array */
5984   if (nsends) k = sstarts[0];
5985   for (i = 0; i < nsends; i++) {
5986     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5987     bufA  = bufa + sstartsj[i];
5988     for (j = 0; j < nrows; j++) {
5989       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5990       for (ll = 0; ll < sbs; ll++) {
5991         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5992         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5993         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5994       }
5995     }
5996     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5997   }
5998   /* recvs and sends of a-array are completed */
5999   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6000   PetscCall(PetscFree(reqs));
6001 
6002   if (scall == MAT_INITIAL_MATRIX) {
6003     Mat_SeqAIJ *b_oth;
6004 
6005     /* put together the new matrix */
6006     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6007 
6008     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6009     /* Since these are PETSc arrays, change flags to free them as necessary. */
6010     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6011     b_oth->free_a  = PETSC_TRUE;
6012     b_oth->free_ij = PETSC_TRUE;
6013     b_oth->nonew   = 0;
6014 
6015     PetscCall(PetscFree(bufj));
6016     if (!startsj_s || !bufa_ptr) {
6017       PetscCall(PetscFree2(sstartsj, rstartsj));
6018       PetscCall(PetscFree(bufa_ptr));
6019     } else {
6020       *startsj_s = sstartsj;
6021       *startsj_r = rstartsj;
6022       *bufa_ptr  = bufa;
6023     }
6024   } else if (scall == MAT_REUSE_MATRIX) {
6025     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6026   }
6027 
6028   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6029   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6030   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6031   PetscFunctionReturn(PETSC_SUCCESS);
6032 }
6033 
6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6037 #if defined(PETSC_HAVE_MKL_SPARSE)
6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6039 #endif
6040 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6041 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6042 #if defined(PETSC_HAVE_ELEMENTAL)
6043 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6044 #endif
6045 #if defined(PETSC_HAVE_SCALAPACK)
6046 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6047 #endif
6048 #if defined(PETSC_HAVE_HYPRE)
6049 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6050 #endif
6051 #if defined(PETSC_HAVE_CUDA)
6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6053 #endif
6054 #if defined(PETSC_HAVE_HIP)
6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6056 #endif
6057 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6059 #endif
6060 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6061 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6062 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6063 
6064 /*
6065     Computes (B'*A')' since computing B*A directly is untenable
6066 
6067                n                       p                          p
6068         [             ]       [             ]         [                 ]
6069       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6070         [             ]       [             ]         [                 ]
6071 
6072 */
6073 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6074 {
6075   Mat At, Bt, Ct;
6076 
6077   PetscFunctionBegin;
6078   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6079   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6080   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6081   PetscCall(MatDestroy(&At));
6082   PetscCall(MatDestroy(&Bt));
6083   PetscCall(MatTransposeSetPrecursor(Ct, C));
6084   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6085   PetscCall(MatDestroy(&Ct));
6086   PetscFunctionReturn(PETSC_SUCCESS);
6087 }
6088 
6089 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6090 {
6091   PetscBool cisdense;
6092 
6093   PetscFunctionBegin;
6094   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6095   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6096   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6097   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6098   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6099   PetscCall(MatSetUp(C));
6100 
6101   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6102   PetscFunctionReturn(PETSC_SUCCESS);
6103 }
6104 
6105 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6106 {
6107   Mat_Product *product = C->product;
6108   Mat          A = product->A, B = product->B;
6109 
6110   PetscFunctionBegin;
6111   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6112              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6113   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6114   C->ops->productsymbolic = MatProductSymbolic_AB;
6115   PetscFunctionReturn(PETSC_SUCCESS);
6116 }
6117 
6118 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6119 {
6120   Mat_Product *product = C->product;
6121 
6122   PetscFunctionBegin;
6123   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6124   PetscFunctionReturn(PETSC_SUCCESS);
6125 }
6126 
6127 /*
6128    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6129 
6130   Input Parameters:
6131 
6132     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6133     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6134 
6135     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6136 
6137     For Set1, j1[] contains column indices of the nonzeros.
6138     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6139     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6140     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6141 
6142     Similar for Set2.
6143 
6144     This routine merges the two sets of nonzeros row by row and removes repeats.
6145 
6146   Output Parameters: (memory is allocated by the caller)
6147 
6148     i[],j[]: the CSR of the merged matrix, which has m rows.
6149     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6150     imap2[]: similar to imap1[], but for Set2.
6151     Note we order nonzeros row-by-row and from left to right.
6152 */
6153 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6154 {
6155   PetscInt   r, m; /* Row index of mat */
6156   PetscCount t, t1, t2, b1, e1, b2, e2;
6157 
6158   PetscFunctionBegin;
6159   PetscCall(MatGetLocalSize(mat, &m, NULL));
6160   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6161   i[0]        = 0;
6162   for (r = 0; r < m; r++) { /* Do row by row merging */
6163     b1 = rowBegin1[r];
6164     e1 = rowEnd1[r];
6165     b2 = rowBegin2[r];
6166     e2 = rowEnd2[r];
6167     while (b1 < e1 && b2 < e2) {
6168       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6169         j[t]      = j1[b1];
6170         imap1[t1] = t;
6171         imap2[t2] = t;
6172         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6173         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6174         t1++;
6175         t2++;
6176         t++;
6177       } else if (j1[b1] < j2[b2]) {
6178         j[t]      = j1[b1];
6179         imap1[t1] = t;
6180         b1 += jmap1[t1 + 1] - jmap1[t1];
6181         t1++;
6182         t++;
6183       } else {
6184         j[t]      = j2[b2];
6185         imap2[t2] = t;
6186         b2 += jmap2[t2 + 1] - jmap2[t2];
6187         t2++;
6188         t++;
6189       }
6190     }
6191     /* Merge the remaining in either j1[] or j2[] */
6192     while (b1 < e1) {
6193       j[t]      = j1[b1];
6194       imap1[t1] = t;
6195       b1 += jmap1[t1 + 1] - jmap1[t1];
6196       t1++;
6197       t++;
6198     }
6199     while (b2 < e2) {
6200       j[t]      = j2[b2];
6201       imap2[t2] = t;
6202       b2 += jmap2[t2 + 1] - jmap2[t2];
6203       t2++;
6204       t++;
6205     }
6206     PetscCall(PetscIntCast(t, i + r + 1));
6207   }
6208   PetscFunctionReturn(PETSC_SUCCESS);
6209 }
6210 
6211 /*
6212   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6213 
6214   Input Parameters:
6215     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6216     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6217       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6218 
6219       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6220       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6221 
6222   Output Parameters:
6223     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6224     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6225       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6226       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6227 
6228     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6229       Atot: number of entries belonging to the diagonal block.
6230       Annz: number of unique nonzeros belonging to the diagonal block.
6231       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6232         repeats (i.e., same 'i,j' pair).
6233       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6234         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6235 
6236       Atot: number of entries belonging to the diagonal block
6237       Annz: number of unique nonzeros belonging to the diagonal block.
6238 
6239     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6240 
6241     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6242 */
6243 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6244 {
6245   PetscInt    cstart, cend, rstart, rend, row, col;
6246   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6247   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6248   PetscCount  k, m, p, q, r, s, mid;
6249   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6250 
6251   PetscFunctionBegin;
6252   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6253   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6254   m = rend - rstart;
6255 
6256   /* Skip negative rows */
6257   for (k = 0; k < n; k++)
6258     if (i[k] >= 0) break;
6259 
6260   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6261      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6262   */
6263   while (k < n) {
6264     row = i[k];
6265     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6266     for (s = k; s < n; s++)
6267       if (i[s] != row) break;
6268 
6269     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6270     for (p = k; p < s; p++) {
6271       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6272       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6273     }
6274     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6275     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6276     rowBegin[row - rstart] = k;
6277     rowMid[row - rstart]   = mid;
6278     rowEnd[row - rstart]   = s;
6279 
6280     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6281     Atot += mid - k;
6282     Btot += s - mid;
6283 
6284     /* Count unique nonzeros of this diag row */
6285     for (p = k; p < mid;) {
6286       col = j[p];
6287       do {
6288         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6289         p++;
6290       } while (p < mid && j[p] == col);
6291       Annz++;
6292     }
6293 
6294     /* Count unique nonzeros of this offdiag row */
6295     for (p = mid; p < s;) {
6296       col = j[p];
6297       do {
6298         p++;
6299       } while (p < s && j[p] == col);
6300       Bnnz++;
6301     }
6302     k = s;
6303   }
6304 
6305   /* Allocation according to Atot, Btot, Annz, Bnnz */
6306   PetscCall(PetscMalloc1(Atot, &Aperm));
6307   PetscCall(PetscMalloc1(Btot, &Bperm));
6308   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6309   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6310 
6311   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6312   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6313   for (r = 0; r < m; r++) {
6314     k   = rowBegin[r];
6315     mid = rowMid[r];
6316     s   = rowEnd[r];
6317     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6318     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6319     Atot += mid - k;
6320     Btot += s - mid;
6321 
6322     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6323     for (p = k; p < mid;) {
6324       col = j[p];
6325       q   = p;
6326       do {
6327         p++;
6328       } while (p < mid && j[p] == col);
6329       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6330       Annz++;
6331     }
6332 
6333     for (p = mid; p < s;) {
6334       col = j[p];
6335       q   = p;
6336       do {
6337         p++;
6338       } while (p < s && j[p] == col);
6339       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6340       Bnnz++;
6341     }
6342   }
6343   /* Output */
6344   *Aperm_ = Aperm;
6345   *Annz_  = Annz;
6346   *Atot_  = Atot;
6347   *Ajmap_ = Ajmap;
6348   *Bperm_ = Bperm;
6349   *Bnnz_  = Bnnz;
6350   *Btot_  = Btot;
6351   *Bjmap_ = Bjmap;
6352   PetscFunctionReturn(PETSC_SUCCESS);
6353 }
6354 
6355 /*
6356   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6357 
6358   Input Parameters:
6359     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6360     nnz:  number of unique nonzeros in the merged matrix
6361     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6362     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6363 
6364   Output Parameter: (memory is allocated by the caller)
6365     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6366 
6367   Example:
6368     nnz1 = 4
6369     nnz  = 6
6370     imap = [1,3,4,5]
6371     jmap = [0,3,5,6,7]
6372    then,
6373     jmap_new = [0,0,3,3,5,6,7]
6374 */
6375 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6376 {
6377   PetscCount k, p;
6378 
6379   PetscFunctionBegin;
6380   jmap_new[0] = 0;
6381   p           = nnz;                /* p loops over jmap_new[] backwards */
6382   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6383     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6384   }
6385   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6386   PetscFunctionReturn(PETSC_SUCCESS);
6387 }
6388 
6389 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6390 {
6391   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6392 
6393   PetscFunctionBegin;
6394   PetscCall(PetscSFDestroy(&coo->sf));
6395   PetscCall(PetscFree(coo->Aperm1));
6396   PetscCall(PetscFree(coo->Bperm1));
6397   PetscCall(PetscFree(coo->Ajmap1));
6398   PetscCall(PetscFree(coo->Bjmap1));
6399   PetscCall(PetscFree(coo->Aimap2));
6400   PetscCall(PetscFree(coo->Bimap2));
6401   PetscCall(PetscFree(coo->Aperm2));
6402   PetscCall(PetscFree(coo->Bperm2));
6403   PetscCall(PetscFree(coo->Ajmap2));
6404   PetscCall(PetscFree(coo->Bjmap2));
6405   PetscCall(PetscFree(coo->Cperm1));
6406   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6407   PetscCall(PetscFree(coo));
6408   PetscFunctionReturn(PETSC_SUCCESS);
6409 }
6410 
6411 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6412 {
6413   MPI_Comm             comm;
6414   PetscMPIInt          rank, size;
6415   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6416   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6417   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6418   PetscContainer       container;
6419   MatCOOStruct_MPIAIJ *coo;
6420 
6421   PetscFunctionBegin;
6422   PetscCall(PetscFree(mpiaij->garray));
6423   PetscCall(VecDestroy(&mpiaij->lvec));
6424 #if defined(PETSC_USE_CTABLE)
6425   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6426 #else
6427   PetscCall(PetscFree(mpiaij->colmap));
6428 #endif
6429   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6430   mat->assembled     = PETSC_FALSE;
6431   mat->was_assembled = PETSC_FALSE;
6432 
6433   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6434   PetscCallMPI(MPI_Comm_size(comm, &size));
6435   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6436   PetscCall(PetscLayoutSetUp(mat->rmap));
6437   PetscCall(PetscLayoutSetUp(mat->cmap));
6438   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6439   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6440   PetscCall(MatGetLocalSize(mat, &m, &n));
6441   PetscCall(MatGetSize(mat, &M, &N));
6442 
6443   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6444   /* entries come first, then local rows, then remote rows.                     */
6445   PetscCount n1 = coo_n, *perm1;
6446   PetscInt  *i1 = coo_i, *j1 = coo_j;
6447 
6448   PetscCall(PetscMalloc1(n1, &perm1));
6449   for (k = 0; k < n1; k++) perm1[k] = k;
6450 
6451   /* Manipulate indices so that entries with negative row or col indices will have smallest
6452      row indices, local entries will have greater but negative row indices, and remote entries
6453      will have positive row indices.
6454   */
6455   for (k = 0; k < n1; k++) {
6456     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6457     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6458     else {
6459       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6460       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6461     }
6462   }
6463 
6464   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6465   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6466 
6467   /* Advance k to the first entry we need to take care of */
6468   for (k = 0; k < n1; k++)
6469     if (i1[k] > PETSC_INT_MIN) break;
6470   PetscCount i1start = k;
6471 
6472   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6473   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6474 
6475   /*           Send remote rows to their owner                                  */
6476   /* Find which rows should be sent to which remote ranks*/
6477   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6478   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6479   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6480   const PetscInt *ranges;
6481   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6482 
6483   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6484   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6485   for (k = rem; k < n1;) {
6486     PetscMPIInt owner;
6487     PetscInt    firstRow, lastRow;
6488 
6489     /* Locate a row range */
6490     firstRow = i1[k]; /* first row of this owner */
6491     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6492     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6493 
6494     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6495     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6496 
6497     /* All entries in [k,p) belong to this remote owner */
6498     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6499       PetscMPIInt *sendto2;
6500       PetscInt    *nentries2;
6501       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6502 
6503       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6504       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6505       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6506       PetscCall(PetscFree2(sendto, nentries2));
6507       sendto   = sendto2;
6508       nentries = nentries2;
6509       maxNsend = maxNsend2;
6510     }
6511     sendto[nsend] = owner;
6512     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6513     nsend++;
6514     k = p;
6515   }
6516 
6517   /* Build 1st SF to know offsets on remote to send data */
6518   PetscSF      sf1;
6519   PetscInt     nroots = 1, nroots2 = 0;
6520   PetscInt     nleaves = nsend, nleaves2 = 0;
6521   PetscInt    *offsets;
6522   PetscSFNode *iremote;
6523 
6524   PetscCall(PetscSFCreate(comm, &sf1));
6525   PetscCall(PetscMalloc1(nsend, &iremote));
6526   PetscCall(PetscMalloc1(nsend, &offsets));
6527   for (k = 0; k < nsend; k++) {
6528     iremote[k].rank  = sendto[k];
6529     iremote[k].index = 0;
6530     nleaves2 += nentries[k];
6531     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6532   }
6533   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6534   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6535   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6536   PetscCall(PetscSFDestroy(&sf1));
6537   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6538 
6539   /* Build 2nd SF to send remote COOs to their owner */
6540   PetscSF sf2;
6541   nroots  = nroots2;
6542   nleaves = nleaves2;
6543   PetscCall(PetscSFCreate(comm, &sf2));
6544   PetscCall(PetscSFSetFromOptions(sf2));
6545   PetscCall(PetscMalloc1(nleaves, &iremote));
6546   p = 0;
6547   for (k = 0; k < nsend; k++) {
6548     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6549     for (q = 0; q < nentries[k]; q++, p++) {
6550       iremote[p].rank = sendto[k];
6551       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6552     }
6553   }
6554   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6555 
6556   /* Send the remote COOs to their owner */
6557   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6558   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6559   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6560   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6561   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6562   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6563   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6564   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6565   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6566   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6567   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6568 
6569   PetscCall(PetscFree(offsets));
6570   PetscCall(PetscFree2(sendto, nentries));
6571 
6572   /* Sort received COOs by row along with the permutation array     */
6573   for (k = 0; k < n2; k++) perm2[k] = k;
6574   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6575 
6576   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6577   PetscCount *Cperm1;
6578   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6579   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6580   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6581   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6582 
6583   /* Support for HYPRE matrices, kind of a hack.
6584      Swap min column with diagonal so that diagonal values will go first */
6585   PetscBool hypre;
6586   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6587   if (hypre) {
6588     PetscInt *minj;
6589     PetscBT   hasdiag;
6590 
6591     PetscCall(PetscBTCreate(m, &hasdiag));
6592     PetscCall(PetscMalloc1(m, &minj));
6593     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6594     for (k = i1start; k < rem; k++) {
6595       if (j1[k] < cstart || j1[k] >= cend) continue;
6596       const PetscInt rindex = i1[k] - rstart;
6597       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6598       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6599     }
6600     for (k = 0; k < n2; k++) {
6601       if (j2[k] < cstart || j2[k] >= cend) continue;
6602       const PetscInt rindex = i2[k] - rstart;
6603       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6604       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6605     }
6606     for (k = i1start; k < rem; k++) {
6607       const PetscInt rindex = i1[k] - rstart;
6608       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6609       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6610       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6611     }
6612     for (k = 0; k < n2; k++) {
6613       const PetscInt rindex = i2[k] - rstart;
6614       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6615       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6616       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6617     }
6618     PetscCall(PetscBTDestroy(&hasdiag));
6619     PetscCall(PetscFree(minj));
6620   }
6621 
6622   /* Split local COOs and received COOs into diag/offdiag portions */
6623   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6624   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6625   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6626   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6627   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6628   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6629 
6630   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6631   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6632   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6633   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6634 
6635   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6636   PetscInt *Ai, *Bi;
6637   PetscInt *Aj, *Bj;
6638 
6639   PetscCall(PetscMalloc1(m + 1, &Ai));
6640   PetscCall(PetscMalloc1(m + 1, &Bi));
6641   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6642   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6643 
6644   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6645   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6646   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6647   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6648   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6649 
6650   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6651   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6652 
6653   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6654   /* expect nonzeros in A/B most likely have local contributing entries        */
6655   PetscInt    Annz = Ai[m];
6656   PetscInt    Bnnz = Bi[m];
6657   PetscCount *Ajmap1_new, *Bjmap1_new;
6658 
6659   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6660   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6661 
6662   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6663   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6664 
6665   PetscCall(PetscFree(Aimap1));
6666   PetscCall(PetscFree(Ajmap1));
6667   PetscCall(PetscFree(Bimap1));
6668   PetscCall(PetscFree(Bjmap1));
6669   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6670   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6671   PetscCall(PetscFree(perm1));
6672   PetscCall(PetscFree3(i2, j2, perm2));
6673 
6674   Ajmap1 = Ajmap1_new;
6675   Bjmap1 = Bjmap1_new;
6676 
6677   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6678   if (Annz < Annz1 + Annz2) {
6679     PetscInt *Aj_new;
6680     PetscCall(PetscMalloc1(Annz, &Aj_new));
6681     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6682     PetscCall(PetscFree(Aj));
6683     Aj = Aj_new;
6684   }
6685 
6686   if (Bnnz < Bnnz1 + Bnnz2) {
6687     PetscInt *Bj_new;
6688     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6689     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6690     PetscCall(PetscFree(Bj));
6691     Bj = Bj_new;
6692   }
6693 
6694   /* Create new submatrices for on-process and off-process coupling                  */
6695   PetscScalar     *Aa, *Ba;
6696   MatType          rtype;
6697   Mat_SeqAIJ      *a, *b;
6698   PetscObjectState state;
6699   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6700   PetscCall(PetscCalloc1(Bnnz, &Ba));
6701   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6702   if (cstart) {
6703     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6704   }
6705 
6706   PetscCall(MatGetRootType_Private(mat, &rtype));
6707 
6708   MatSeqXAIJGetOptions_Private(mpiaij->A);
6709   PetscCall(MatDestroy(&mpiaij->A));
6710   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6711   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6712   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6713 
6714   MatSeqXAIJGetOptions_Private(mpiaij->B);
6715   PetscCall(MatDestroy(&mpiaij->B));
6716   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6717   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6718   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6719 
6720   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6721   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6722   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6723   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6724 
6725   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6726   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6727   a->free_a  = PETSC_TRUE;
6728   a->free_ij = PETSC_TRUE;
6729   b->free_a  = PETSC_TRUE;
6730   b->free_ij = PETSC_TRUE;
6731   a->maxnz   = a->nz;
6732   b->maxnz   = b->nz;
6733 
6734   /* conversion must happen AFTER multiply setup */
6735   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6736   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6737   PetscCall(VecDestroy(&mpiaij->lvec));
6738   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6739 
6740   // Put the COO struct in a container and then attach that to the matrix
6741   PetscCall(PetscMalloc1(1, &coo));
6742   coo->n       = coo_n;
6743   coo->sf      = sf2;
6744   coo->sendlen = nleaves;
6745   coo->recvlen = nroots;
6746   coo->Annz    = Annz;
6747   coo->Bnnz    = Bnnz;
6748   coo->Annz2   = Annz2;
6749   coo->Bnnz2   = Bnnz2;
6750   coo->Atot1   = Atot1;
6751   coo->Atot2   = Atot2;
6752   coo->Btot1   = Btot1;
6753   coo->Btot2   = Btot2;
6754   coo->Ajmap1  = Ajmap1;
6755   coo->Aperm1  = Aperm1;
6756   coo->Bjmap1  = Bjmap1;
6757   coo->Bperm1  = Bperm1;
6758   coo->Aimap2  = Aimap2;
6759   coo->Ajmap2  = Ajmap2;
6760   coo->Aperm2  = Aperm2;
6761   coo->Bimap2  = Bimap2;
6762   coo->Bjmap2  = Bjmap2;
6763   coo->Bperm2  = Bperm2;
6764   coo->Cperm1  = Cperm1;
6765   // Allocate in preallocation. If not used, it has zero cost on host
6766   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6767   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6768   PetscCall(PetscContainerSetPointer(container, coo));
6769   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6770   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6771   PetscCall(PetscContainerDestroy(&container));
6772   PetscFunctionReturn(PETSC_SUCCESS);
6773 }
6774 
6775 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6776 {
6777   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6778   Mat                  A = mpiaij->A, B = mpiaij->B;
6779   PetscScalar         *Aa, *Ba;
6780   PetscScalar         *sendbuf, *recvbuf;
6781   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6782   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6783   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6784   const PetscCount    *Cperm1;
6785   PetscContainer       container;
6786   MatCOOStruct_MPIAIJ *coo;
6787 
6788   PetscFunctionBegin;
6789   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6790   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6791   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6792   sendbuf = coo->sendbuf;
6793   recvbuf = coo->recvbuf;
6794   Ajmap1  = coo->Ajmap1;
6795   Ajmap2  = coo->Ajmap2;
6796   Aimap2  = coo->Aimap2;
6797   Bjmap1  = coo->Bjmap1;
6798   Bjmap2  = coo->Bjmap2;
6799   Bimap2  = coo->Bimap2;
6800   Aperm1  = coo->Aperm1;
6801   Aperm2  = coo->Aperm2;
6802   Bperm1  = coo->Bperm1;
6803   Bperm2  = coo->Bperm2;
6804   Cperm1  = coo->Cperm1;
6805 
6806   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6807   PetscCall(MatSeqAIJGetArray(B, &Ba));
6808 
6809   /* Pack entries to be sent to remote */
6810   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6811 
6812   /* Send remote entries to their owner and overlap the communication with local computation */
6813   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6814   /* Add local entries to A and B */
6815   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6816     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6817     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6818     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6819   }
6820   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6821     PetscScalar sum = 0.0;
6822     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6823     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6824   }
6825   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6826 
6827   /* Add received remote entries to A and B */
6828   for (PetscCount i = 0; i < coo->Annz2; i++) {
6829     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6830   }
6831   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6832     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6833   }
6834   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6835   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6836   PetscFunctionReturn(PETSC_SUCCESS);
6837 }
6838 
6839 /*MC
6840    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6841 
6842    Options Database Keys:
6843 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6844 
6845    Level: beginner
6846 
6847    Notes:
6848    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6849     in this case the values associated with the rows and columns one passes in are set to zero
6850     in the matrix
6851 
6852     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6853     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6854 
6855 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6856 M*/
6857 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6858 {
6859   Mat_MPIAIJ *b;
6860   PetscMPIInt size;
6861 
6862   PetscFunctionBegin;
6863   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6864 
6865   PetscCall(PetscNew(&b));
6866   B->data       = (void *)b;
6867   B->ops[0]     = MatOps_Values;
6868   B->assembled  = PETSC_FALSE;
6869   B->insertmode = NOT_SET_VALUES;
6870   b->size       = size;
6871 
6872   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6873 
6874   /* build cache for off array entries formed */
6875   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6876 
6877   b->donotstash  = PETSC_FALSE;
6878   b->colmap      = NULL;
6879   b->garray      = NULL;
6880   b->roworiented = PETSC_TRUE;
6881 
6882   /* stuff used for matrix vector multiply */
6883   b->lvec  = NULL;
6884   b->Mvctx = NULL;
6885 
6886   /* stuff for MatGetRow() */
6887   b->rowindices   = NULL;
6888   b->rowvalues    = NULL;
6889   b->getrowactive = PETSC_FALSE;
6890 
6891   /* flexible pointer used in CUSPARSE classes */
6892   b->spptr = NULL;
6893 
6894   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6897   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6900   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6902   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6904 #if defined(PETSC_HAVE_CUDA)
6905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6906 #endif
6907 #if defined(PETSC_HAVE_HIP)
6908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6909 #endif
6910 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6912 #endif
6913 #if defined(PETSC_HAVE_MKL_SPARSE)
6914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6915 #endif
6916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6920 #if defined(PETSC_HAVE_ELEMENTAL)
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6922 #endif
6923 #if defined(PETSC_HAVE_SCALAPACK)
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6925 #endif
6926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6928 #if defined(PETSC_HAVE_HYPRE)
6929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6931 #endif
6932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6936   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6937   PetscFunctionReturn(PETSC_SUCCESS);
6938 }
6939 
6940 /*@
6941   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6942   and "off-diagonal" part of the matrix in CSR format.
6943 
6944   Collective
6945 
6946   Input Parameters:
6947 + comm - MPI communicator
6948 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6949 . n    - This value should be the same as the local size used in creating the
6950          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6951          calculated if `N` is given) For square matrices `n` is almost always `m`.
6952 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6953 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6954 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6955 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6956 . a    - matrix values
6957 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6958 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6959 - oa   - matrix values
6960 
6961   Output Parameter:
6962 . mat - the matrix
6963 
6964   Level: advanced
6965 
6966   Notes:
6967   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6968   must free the arrays once the matrix has been destroyed and not before.
6969 
6970   The `i` and `j` indices are 0 based
6971 
6972   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6973 
6974   This sets local rows and cannot be used to set off-processor values.
6975 
6976   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6977   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6978   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6979   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6980   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6981   communication if it is known that only local entries will be set.
6982 
6983 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6984           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6985 @*/
6986 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6987 {
6988   Mat_MPIAIJ *maij;
6989 
6990   PetscFunctionBegin;
6991   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6992   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6993   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6994   PetscCall(MatCreate(comm, mat));
6995   PetscCall(MatSetSizes(*mat, m, n, M, N));
6996   PetscCall(MatSetType(*mat, MATMPIAIJ));
6997   maij = (Mat_MPIAIJ *)(*mat)->data;
6998 
6999   (*mat)->preallocated = PETSC_TRUE;
7000 
7001   PetscCall(PetscLayoutSetUp((*mat)->rmap));
7002   PetscCall(PetscLayoutSetUp((*mat)->cmap));
7003 
7004   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
7005   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7006 
7007   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7008   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7009   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7010   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7011   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7012   PetscFunctionReturn(PETSC_SUCCESS);
7013 }
7014 
7015 typedef struct {
7016   Mat       *mp;    /* intermediate products */
7017   PetscBool *mptmp; /* is the intermediate product temporary ? */
7018   PetscInt   cp;    /* number of intermediate products */
7019 
7020   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7021   PetscInt    *startsj_s, *startsj_r;
7022   PetscScalar *bufa;
7023   Mat          P_oth;
7024 
7025   /* may take advantage of merging product->B */
7026   Mat Bloc; /* B-local by merging diag and off-diag */
7027 
7028   /* cusparse does not have support to split between symbolic and numeric phases.
7029      When api_user is true, we don't need to update the numerical values
7030      of the temporary storage */
7031   PetscBool reusesym;
7032 
7033   /* support for COO values insertion */
7034   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7035   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7036   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7037   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7038   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7039   PetscMemType mtype;
7040 
7041   /* customization */
7042   PetscBool abmerge;
7043   PetscBool P_oth_bind;
7044 } MatMatMPIAIJBACKEND;
7045 
7046 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7047 {
7048   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7049   PetscInt             i;
7050 
7051   PetscFunctionBegin;
7052   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7053   PetscCall(PetscFree(mmdata->bufa));
7054   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7055   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7056   PetscCall(MatDestroy(&mmdata->P_oth));
7057   PetscCall(MatDestroy(&mmdata->Bloc));
7058   PetscCall(PetscSFDestroy(&mmdata->sf));
7059   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7060   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7061   PetscCall(PetscFree(mmdata->own[0]));
7062   PetscCall(PetscFree(mmdata->own));
7063   PetscCall(PetscFree(mmdata->off[0]));
7064   PetscCall(PetscFree(mmdata->off));
7065   PetscCall(PetscFree(mmdata));
7066   PetscFunctionReturn(PETSC_SUCCESS);
7067 }
7068 
7069 /* Copy selected n entries with indices in idx[] of A to v[].
7070    If idx is NULL, copy the whole data array of A to v[]
7071  */
7072 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7073 {
7074   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7075 
7076   PetscFunctionBegin;
7077   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7078   if (f) {
7079     PetscCall((*f)(A, n, idx, v));
7080   } else {
7081     const PetscScalar *vv;
7082 
7083     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7084     if (n && idx) {
7085       PetscScalar    *w  = v;
7086       const PetscInt *oi = idx;
7087       PetscInt        j;
7088 
7089       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7090     } else {
7091       PetscCall(PetscArraycpy(v, vv, n));
7092     }
7093     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7094   }
7095   PetscFunctionReturn(PETSC_SUCCESS);
7096 }
7097 
7098 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7099 {
7100   MatMatMPIAIJBACKEND *mmdata;
7101   PetscInt             i, n_d, n_o;
7102 
7103   PetscFunctionBegin;
7104   MatCheckProduct(C, 1);
7105   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7106   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7107   if (!mmdata->reusesym) { /* update temporary matrices */
7108     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7109     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7110   }
7111   mmdata->reusesym = PETSC_FALSE;
7112 
7113   for (i = 0; i < mmdata->cp; i++) {
7114     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7115     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7116   }
7117   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7118     PetscInt noff;
7119 
7120     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7121     if (mmdata->mptmp[i]) continue;
7122     if (noff) {
7123       PetscInt nown;
7124 
7125       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7126       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7127       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7128       n_o += noff;
7129       n_d += nown;
7130     } else {
7131       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7132 
7133       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7134       n_d += mm->nz;
7135     }
7136   }
7137   if (mmdata->hasoffproc) { /* offprocess insertion */
7138     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7139     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7140   }
7141   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7142   PetscFunctionReturn(PETSC_SUCCESS);
7143 }
7144 
7145 /* Support for Pt * A, A * P, or Pt * A * P */
7146 #define MAX_NUMBER_INTERMEDIATE 4
7147 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7148 {
7149   Mat_Product           *product = C->product;
7150   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7151   Mat_MPIAIJ            *a, *p;
7152   MatMatMPIAIJBACKEND   *mmdata;
7153   ISLocalToGlobalMapping P_oth_l2g = NULL;
7154   IS                     glob      = NULL;
7155   const char            *prefix;
7156   char                   pprefix[256];
7157   const PetscInt        *globidx, *P_oth_idx;
7158   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7159   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7160   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7161                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7162                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7163   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7164 
7165   MatProductType ptype;
7166   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7167   PetscMPIInt    size;
7168 
7169   PetscFunctionBegin;
7170   MatCheckProduct(C, 1);
7171   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7172   ptype = product->type;
7173   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7174     ptype                                          = MATPRODUCT_AB;
7175     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7176   }
7177   switch (ptype) {
7178   case MATPRODUCT_AB:
7179     A          = product->A;
7180     P          = product->B;
7181     m          = A->rmap->n;
7182     n          = P->cmap->n;
7183     M          = A->rmap->N;
7184     N          = P->cmap->N;
7185     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7186     break;
7187   case MATPRODUCT_AtB:
7188     P          = product->A;
7189     A          = product->B;
7190     m          = P->cmap->n;
7191     n          = A->cmap->n;
7192     M          = P->cmap->N;
7193     N          = A->cmap->N;
7194     hasoffproc = PETSC_TRUE;
7195     break;
7196   case MATPRODUCT_PtAP:
7197     A          = product->A;
7198     P          = product->B;
7199     m          = P->cmap->n;
7200     n          = P->cmap->n;
7201     M          = P->cmap->N;
7202     N          = P->cmap->N;
7203     hasoffproc = PETSC_TRUE;
7204     break;
7205   default:
7206     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7207   }
7208   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7209   if (size == 1) hasoffproc = PETSC_FALSE;
7210 
7211   /* defaults */
7212   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7213     mp[i]    = NULL;
7214     mptmp[i] = PETSC_FALSE;
7215     rmapt[i] = -1;
7216     cmapt[i] = -1;
7217     rmapa[i] = NULL;
7218     cmapa[i] = NULL;
7219   }
7220 
7221   /* customization */
7222   PetscCall(PetscNew(&mmdata));
7223   mmdata->reusesym = product->api_user;
7224   if (ptype == MATPRODUCT_AB) {
7225     if (product->api_user) {
7226       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7227       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7228       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7229       PetscOptionsEnd();
7230     } else {
7231       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7232       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7233       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7234       PetscOptionsEnd();
7235     }
7236   } else if (ptype == MATPRODUCT_PtAP) {
7237     if (product->api_user) {
7238       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7239       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7240       PetscOptionsEnd();
7241     } else {
7242       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7243       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7244       PetscOptionsEnd();
7245     }
7246   }
7247   a = (Mat_MPIAIJ *)A->data;
7248   p = (Mat_MPIAIJ *)P->data;
7249   PetscCall(MatSetSizes(C, m, n, M, N));
7250   PetscCall(PetscLayoutSetUp(C->rmap));
7251   PetscCall(PetscLayoutSetUp(C->cmap));
7252   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7253   PetscCall(MatGetOptionsPrefix(C, &prefix));
7254 
7255   cp = 0;
7256   switch (ptype) {
7257   case MATPRODUCT_AB: /* A * P */
7258     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7259 
7260     /* A_diag * P_local (merged or not) */
7261     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7262       /* P is product->B */
7263       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7264       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7265       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7266       PetscCall(MatProductSetFill(mp[cp], product->fill));
7267       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7268       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7269       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7270       mp[cp]->product->api_user = product->api_user;
7271       PetscCall(MatProductSetFromOptions(mp[cp]));
7272       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7273       PetscCall(ISGetIndices(glob, &globidx));
7274       rmapt[cp] = 1;
7275       cmapt[cp] = 2;
7276       cmapa[cp] = globidx;
7277       mptmp[cp] = PETSC_FALSE;
7278       cp++;
7279     } else { /* A_diag * P_diag and A_diag * P_off */
7280       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7281       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7282       PetscCall(MatProductSetFill(mp[cp], product->fill));
7283       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7284       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7285       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7286       mp[cp]->product->api_user = product->api_user;
7287       PetscCall(MatProductSetFromOptions(mp[cp]));
7288       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7289       rmapt[cp] = 1;
7290       cmapt[cp] = 1;
7291       mptmp[cp] = PETSC_FALSE;
7292       cp++;
7293       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7294       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7295       PetscCall(MatProductSetFill(mp[cp], product->fill));
7296       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7297       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7298       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7299       mp[cp]->product->api_user = product->api_user;
7300       PetscCall(MatProductSetFromOptions(mp[cp]));
7301       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7302       rmapt[cp] = 1;
7303       cmapt[cp] = 2;
7304       cmapa[cp] = p->garray;
7305       mptmp[cp] = PETSC_FALSE;
7306       cp++;
7307     }
7308 
7309     /* A_off * P_other */
7310     if (mmdata->P_oth) {
7311       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7312       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7313       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7314       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7315       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7316       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7317       PetscCall(MatProductSetFill(mp[cp], product->fill));
7318       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7319       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7320       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7321       mp[cp]->product->api_user = product->api_user;
7322       PetscCall(MatProductSetFromOptions(mp[cp]));
7323       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7324       rmapt[cp] = 1;
7325       cmapt[cp] = 2;
7326       cmapa[cp] = P_oth_idx;
7327       mptmp[cp] = PETSC_FALSE;
7328       cp++;
7329     }
7330     break;
7331 
7332   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7333     /* A is product->B */
7334     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7335     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7336       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7337       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7338       PetscCall(MatProductSetFill(mp[cp], product->fill));
7339       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7340       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7341       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7342       mp[cp]->product->api_user = product->api_user;
7343       PetscCall(MatProductSetFromOptions(mp[cp]));
7344       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7345       PetscCall(ISGetIndices(glob, &globidx));
7346       rmapt[cp] = 2;
7347       rmapa[cp] = globidx;
7348       cmapt[cp] = 2;
7349       cmapa[cp] = globidx;
7350       mptmp[cp] = PETSC_FALSE;
7351       cp++;
7352     } else {
7353       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7354       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7355       PetscCall(MatProductSetFill(mp[cp], product->fill));
7356       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7357       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7358       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7359       mp[cp]->product->api_user = product->api_user;
7360       PetscCall(MatProductSetFromOptions(mp[cp]));
7361       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7362       PetscCall(ISGetIndices(glob, &globidx));
7363       rmapt[cp] = 1;
7364       cmapt[cp] = 2;
7365       cmapa[cp] = globidx;
7366       mptmp[cp] = PETSC_FALSE;
7367       cp++;
7368       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7369       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7370       PetscCall(MatProductSetFill(mp[cp], product->fill));
7371       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7372       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7373       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7374       mp[cp]->product->api_user = product->api_user;
7375       PetscCall(MatProductSetFromOptions(mp[cp]));
7376       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7377       rmapt[cp] = 2;
7378       rmapa[cp] = p->garray;
7379       cmapt[cp] = 2;
7380       cmapa[cp] = globidx;
7381       mptmp[cp] = PETSC_FALSE;
7382       cp++;
7383     }
7384     break;
7385   case MATPRODUCT_PtAP:
7386     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7387     /* P is product->B */
7388     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7389     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7390     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7391     PetscCall(MatProductSetFill(mp[cp], product->fill));
7392     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7393     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7394     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7395     mp[cp]->product->api_user = product->api_user;
7396     PetscCall(MatProductSetFromOptions(mp[cp]));
7397     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7398     PetscCall(ISGetIndices(glob, &globidx));
7399     rmapt[cp] = 2;
7400     rmapa[cp] = globidx;
7401     cmapt[cp] = 2;
7402     cmapa[cp] = globidx;
7403     mptmp[cp] = PETSC_FALSE;
7404     cp++;
7405     if (mmdata->P_oth) {
7406       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7407       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7408       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7409       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7410       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7411       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7412       PetscCall(MatProductSetFill(mp[cp], product->fill));
7413       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7414       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7415       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7416       mp[cp]->product->api_user = product->api_user;
7417       PetscCall(MatProductSetFromOptions(mp[cp]));
7418       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7419       mptmp[cp] = PETSC_TRUE;
7420       cp++;
7421       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7422       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7423       PetscCall(MatProductSetFill(mp[cp], product->fill));
7424       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7425       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7426       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7427       mp[cp]->product->api_user = product->api_user;
7428       PetscCall(MatProductSetFromOptions(mp[cp]));
7429       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7430       rmapt[cp] = 2;
7431       rmapa[cp] = globidx;
7432       cmapt[cp] = 2;
7433       cmapa[cp] = P_oth_idx;
7434       mptmp[cp] = PETSC_FALSE;
7435       cp++;
7436     }
7437     break;
7438   default:
7439     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7440   }
7441   /* sanity check */
7442   if (size > 1)
7443     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7444 
7445   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7446   for (i = 0; i < cp; i++) {
7447     mmdata->mp[i]    = mp[i];
7448     mmdata->mptmp[i] = mptmp[i];
7449   }
7450   mmdata->cp             = cp;
7451   C->product->data       = mmdata;
7452   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7453   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7454 
7455   /* memory type */
7456   mmdata->mtype = PETSC_MEMTYPE_HOST;
7457   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7458   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7459   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7460   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7461   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7462   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7463 
7464   /* prepare coo coordinates for values insertion */
7465 
7466   /* count total nonzeros of those intermediate seqaij Mats
7467     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7468     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7469     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7470   */
7471   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7472     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7473     if (mptmp[cp]) continue;
7474     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7475       const PetscInt *rmap = rmapa[cp];
7476       const PetscInt  mr   = mp[cp]->rmap->n;
7477       const PetscInt  rs   = C->rmap->rstart;
7478       const PetscInt  re   = C->rmap->rend;
7479       const PetscInt *ii   = mm->i;
7480       for (i = 0; i < mr; i++) {
7481         const PetscInt gr = rmap[i];
7482         const PetscInt nz = ii[i + 1] - ii[i];
7483         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7484         else ncoo_oown += nz;                  /* this row is local */
7485       }
7486     } else ncoo_d += mm->nz;
7487   }
7488 
7489   /*
7490     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7491 
7492     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7493 
7494     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7495 
7496     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7497     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7498     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7499 
7500     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7501     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7502   */
7503   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7504   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7505 
7506   /* gather (i,j) of nonzeros inserted by remote procs */
7507   if (hasoffproc) {
7508     PetscSF  msf;
7509     PetscInt ncoo2, *coo_i2, *coo_j2;
7510 
7511     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7512     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7513     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7514 
7515     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7516       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7517       PetscInt   *idxoff = mmdata->off[cp];
7518       PetscInt   *idxown = mmdata->own[cp];
7519       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7520         const PetscInt *rmap = rmapa[cp];
7521         const PetscInt *cmap = cmapa[cp];
7522         const PetscInt *ii   = mm->i;
7523         PetscInt       *coi  = coo_i + ncoo_o;
7524         PetscInt       *coj  = coo_j + ncoo_o;
7525         const PetscInt  mr   = mp[cp]->rmap->n;
7526         const PetscInt  rs   = C->rmap->rstart;
7527         const PetscInt  re   = C->rmap->rend;
7528         const PetscInt  cs   = C->cmap->rstart;
7529         for (i = 0; i < mr; i++) {
7530           const PetscInt *jj = mm->j + ii[i];
7531           const PetscInt  gr = rmap[i];
7532           const PetscInt  nz = ii[i + 1] - ii[i];
7533           if (gr < rs || gr >= re) { /* this is an offproc row */
7534             for (j = ii[i]; j < ii[i + 1]; j++) {
7535               *coi++    = gr;
7536               *idxoff++ = j;
7537             }
7538             if (!cmapt[cp]) { /* already global */
7539               for (j = 0; j < nz; j++) *coj++ = jj[j];
7540             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7541               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7542             } else { /* offdiag */
7543               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7544             }
7545             ncoo_o += nz;
7546           } else { /* this is a local row */
7547             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7548           }
7549         }
7550       }
7551       mmdata->off[cp + 1] = idxoff;
7552       mmdata->own[cp + 1] = idxown;
7553     }
7554 
7555     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7556     PetscInt incoo_o;
7557     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7558     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7559     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7560     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7561     ncoo = ncoo_d + ncoo_oown + ncoo2;
7562     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7563     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7564     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7565     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7566     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7567     PetscCall(PetscFree2(coo_i, coo_j));
7568     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7569     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7570     coo_i = coo_i2;
7571     coo_j = coo_j2;
7572   } else { /* no offproc values insertion */
7573     ncoo = ncoo_d;
7574     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7575 
7576     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7577     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7578     PetscCall(PetscSFSetUp(mmdata->sf));
7579   }
7580   mmdata->hasoffproc = hasoffproc;
7581 
7582   /* gather (i,j) of nonzeros inserted locally */
7583   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7584     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7585     PetscInt       *coi  = coo_i + ncoo_d;
7586     PetscInt       *coj  = coo_j + ncoo_d;
7587     const PetscInt *jj   = mm->j;
7588     const PetscInt *ii   = mm->i;
7589     const PetscInt *cmap = cmapa[cp];
7590     const PetscInt *rmap = rmapa[cp];
7591     const PetscInt  mr   = mp[cp]->rmap->n;
7592     const PetscInt  rs   = C->rmap->rstart;
7593     const PetscInt  re   = C->rmap->rend;
7594     const PetscInt  cs   = C->cmap->rstart;
7595 
7596     if (mptmp[cp]) continue;
7597     if (rmapt[cp] == 1) { /* consecutive rows */
7598       /* fill coo_i */
7599       for (i = 0; i < mr; i++) {
7600         const PetscInt gr = i + rs;
7601         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7602       }
7603       /* fill coo_j */
7604       if (!cmapt[cp]) { /* type-0, already global */
7605         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7606       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7607         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7608       } else {                                            /* type-2, local to global for sparse columns */
7609         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7610       }
7611       ncoo_d += mm->nz;
7612     } else if (rmapt[cp] == 2) { /* sparse rows */
7613       for (i = 0; i < mr; i++) {
7614         const PetscInt *jj = mm->j + ii[i];
7615         const PetscInt  gr = rmap[i];
7616         const PetscInt  nz = ii[i + 1] - ii[i];
7617         if (gr >= rs && gr < re) { /* local rows */
7618           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7619           if (!cmapt[cp]) { /* type-0, already global */
7620             for (j = 0; j < nz; j++) *coj++ = jj[j];
7621           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7622             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7623           } else { /* type-2, local to global for sparse columns */
7624             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7625           }
7626           ncoo_d += nz;
7627         }
7628       }
7629     }
7630   }
7631   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7632   PetscCall(ISDestroy(&glob));
7633   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7634   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7635   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7636   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7637 
7638   /* preallocate with COO data */
7639   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7640   PetscCall(PetscFree2(coo_i, coo_j));
7641   PetscFunctionReturn(PETSC_SUCCESS);
7642 }
7643 
7644 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7645 {
7646   Mat_Product *product = mat->product;
7647 #if defined(PETSC_HAVE_DEVICE)
7648   PetscBool match  = PETSC_FALSE;
7649   PetscBool usecpu = PETSC_FALSE;
7650 #else
7651   PetscBool match = PETSC_TRUE;
7652 #endif
7653 
7654   PetscFunctionBegin;
7655   MatCheckProduct(mat, 1);
7656 #if defined(PETSC_HAVE_DEVICE)
7657   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7658   if (match) { /* we can always fallback to the CPU if requested */
7659     switch (product->type) {
7660     case MATPRODUCT_AB:
7661       if (product->api_user) {
7662         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7663         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7664         PetscOptionsEnd();
7665       } else {
7666         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7667         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7668         PetscOptionsEnd();
7669       }
7670       break;
7671     case MATPRODUCT_AtB:
7672       if (product->api_user) {
7673         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7674         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7675         PetscOptionsEnd();
7676       } else {
7677         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7678         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7679         PetscOptionsEnd();
7680       }
7681       break;
7682     case MATPRODUCT_PtAP:
7683       if (product->api_user) {
7684         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7685         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7686         PetscOptionsEnd();
7687       } else {
7688         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7689         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7690         PetscOptionsEnd();
7691       }
7692       break;
7693     default:
7694       break;
7695     }
7696     match = (PetscBool)!usecpu;
7697   }
7698 #endif
7699   if (match) {
7700     switch (product->type) {
7701     case MATPRODUCT_AB:
7702     case MATPRODUCT_AtB:
7703     case MATPRODUCT_PtAP:
7704       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7705       break;
7706     default:
7707       break;
7708     }
7709   }
7710   /* fallback to MPIAIJ ops */
7711   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7712   PetscFunctionReturn(PETSC_SUCCESS);
7713 }
7714 
7715 /*
7716    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7717 
7718    n - the number of block indices in cc[]
7719    cc - the block indices (must be large enough to contain the indices)
7720 */
7721 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7722 {
7723   PetscInt        cnt = -1, nidx, j;
7724   const PetscInt *idx;
7725 
7726   PetscFunctionBegin;
7727   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7728   if (nidx) {
7729     cnt     = 0;
7730     cc[cnt] = idx[0] / bs;
7731     for (j = 1; j < nidx; j++) {
7732       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7733     }
7734   }
7735   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7736   *n = cnt + 1;
7737   PetscFunctionReturn(PETSC_SUCCESS);
7738 }
7739 
7740 /*
7741     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7742 
7743     ncollapsed - the number of block indices
7744     collapsed - the block indices (must be large enough to contain the indices)
7745 */
7746 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7747 {
7748   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7749 
7750   PetscFunctionBegin;
7751   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7752   for (i = start + 1; i < start + bs; i++) {
7753     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7754     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7755     cprevtmp = cprev;
7756     cprev    = merged;
7757     merged   = cprevtmp;
7758   }
7759   *ncollapsed = nprev;
7760   if (collapsed) *collapsed = cprev;
7761   PetscFunctionReturn(PETSC_SUCCESS);
7762 }
7763 
7764 /*
7765  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7766 
7767  Input Parameter:
7768  . Amat - matrix
7769  - symmetrize - make the result symmetric
7770  + scale - scale with diagonal
7771 
7772  Output Parameter:
7773  . a_Gmat - output scalar graph >= 0
7774 
7775 */
7776 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7777 {
7778   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7779   MPI_Comm  comm;
7780   Mat       Gmat;
7781   PetscBool ismpiaij, isseqaij;
7782   Mat       a, b, c;
7783   MatType   jtype;
7784 
7785   PetscFunctionBegin;
7786   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7787   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7788   PetscCall(MatGetSize(Amat, &MM, &NN));
7789   PetscCall(MatGetBlockSize(Amat, &bs));
7790   nloc = (Iend - Istart) / bs;
7791 
7792   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7793   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7794   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7795 
7796   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7797   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7798      implementation */
7799   if (bs > 1) {
7800     PetscCall(MatGetType(Amat, &jtype));
7801     PetscCall(MatCreate(comm, &Gmat));
7802     PetscCall(MatSetType(Gmat, jtype));
7803     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7804     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7805     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7806       PetscInt  *d_nnz, *o_nnz;
7807       MatScalar *aa, val, *AA;
7808       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7809 
7810       if (isseqaij) {
7811         a = Amat;
7812         b = NULL;
7813       } else {
7814         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7815         a             = d->A;
7816         b             = d->B;
7817       }
7818       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7819       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7820       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7821         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7822         const PetscInt *cols1, *cols2;
7823 
7824         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7825           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7826           nnz[brow / bs] = nc2 / bs;
7827           if (nc2 % bs) ok = 0;
7828           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7829           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7830             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7831             if (nc1 != nc2) ok = 0;
7832             else {
7833               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7834                 if (cols1[jj] != cols2[jj]) ok = 0;
7835                 if (cols1[jj] % bs != jj % bs) ok = 0;
7836               }
7837             }
7838             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7839           }
7840           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7841           if (!ok) {
7842             PetscCall(PetscFree2(d_nnz, o_nnz));
7843             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7844             goto old_bs;
7845           }
7846         }
7847       }
7848       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7849       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7850       PetscCall(PetscFree2(d_nnz, o_nnz));
7851       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7852       // diag
7853       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7854         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7855 
7856         ai = aseq->i;
7857         n  = ai[brow + 1] - ai[brow];
7858         aj = aseq->j + ai[brow];
7859         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7860           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7861           val        = 0;
7862           if (index_size == 0) {
7863             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7864               aa = aseq->a + ai[brow + ii] + k;
7865               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7866                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7867               }
7868             }
7869           } else {                                            // use (index,index) value if provided
7870             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7871               PetscInt ii = index[iii];
7872               aa          = aseq->a + ai[brow + ii] + k;
7873               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7874                 PetscInt jj = index[jjj];
7875                 val += PetscAbs(PetscRealPart(aa[jj]));
7876               }
7877             }
7878           }
7879           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7880           AA[k / bs] = val;
7881         }
7882         grow = Istart / bs + brow / bs;
7883         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7884       }
7885       // off-diag
7886       if (ismpiaij) {
7887         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7888         const PetscScalar *vals;
7889         const PetscInt    *cols, *garray = aij->garray;
7890 
7891         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7892         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7893           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7894           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7895             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7896             AA[k / bs] = 0;
7897             AJ[cidx]   = garray[cols[k]] / bs;
7898           }
7899           nc = ncols / bs;
7900           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7901           if (index_size == 0) {
7902             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7903               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7904               for (PetscInt k = 0; k < ncols; k += bs) {
7905                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7906                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7907                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7908                 }
7909               }
7910               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7911             }
7912           } else {                                            // use (index,index) value if provided
7913             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7914               PetscInt ii = index[iii];
7915               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7916               for (PetscInt k = 0; k < ncols; k += bs) {
7917                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7918                   PetscInt jj = index[jjj];
7919                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7920                 }
7921               }
7922               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7923             }
7924           }
7925           grow = Istart / bs + brow / bs;
7926           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7927         }
7928       }
7929       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7930       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7931       PetscCall(PetscFree2(AA, AJ));
7932     } else {
7933       const PetscScalar *vals;
7934       const PetscInt    *idx;
7935       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7936     old_bs:
7937       /*
7938        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7939        */
7940       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7941       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7942       if (isseqaij) {
7943         PetscInt max_d_nnz;
7944 
7945         /*
7946          Determine exact preallocation count for (sequential) scalar matrix
7947          */
7948         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7949         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7950         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7951         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7952         PetscCall(PetscFree3(w0, w1, w2));
7953       } else if (ismpiaij) {
7954         Mat             Daij, Oaij;
7955         const PetscInt *garray;
7956         PetscInt        max_d_nnz;
7957 
7958         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7959         /*
7960          Determine exact preallocation count for diagonal block portion of scalar matrix
7961          */
7962         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7963         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7964         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7965         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7966         PetscCall(PetscFree3(w0, w1, w2));
7967         /*
7968          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7969          */
7970         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7971           o_nnz[jj] = 0;
7972           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7973             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7974             o_nnz[jj] += ncols;
7975             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7976           }
7977           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7978         }
7979       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7980       /* get scalar copy (norms) of matrix */
7981       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7982       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7983       PetscCall(PetscFree2(d_nnz, o_nnz));
7984       for (Ii = Istart; Ii < Iend; Ii++) {
7985         PetscInt dest_row = Ii / bs;
7986 
7987         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7988         for (jj = 0; jj < ncols; jj++) {
7989           PetscInt    dest_col = idx[jj] / bs;
7990           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7991 
7992           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7993         }
7994         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7995       }
7996       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7997       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7998     }
7999   } else {
8000     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8001     else {
8002       Gmat = Amat;
8003       PetscCall(PetscObjectReference((PetscObject)Gmat));
8004     }
8005     if (isseqaij) {
8006       a = Gmat;
8007       b = NULL;
8008     } else {
8009       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8010       a             = d->A;
8011       b             = d->B;
8012     }
8013     if (filter >= 0 || scale) {
8014       /* take absolute value of each entry */
8015       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8016         MatInfo      info;
8017         PetscScalar *avals;
8018 
8019         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8020         PetscCall(MatSeqAIJGetArray(c, &avals));
8021         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8022         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8023       }
8024     }
8025   }
8026   if (symmetrize) {
8027     PetscBool isset, issym;
8028 
8029     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8030     if (!isset || !issym) {
8031       Mat matTrans;
8032 
8033       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8034       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8035       PetscCall(MatDestroy(&matTrans));
8036     }
8037     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8038   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8039   if (scale) {
8040     /* scale c for all diagonal values = 1 or -1 */
8041     Vec diag;
8042 
8043     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8044     PetscCall(MatGetDiagonal(Gmat, diag));
8045     PetscCall(VecReciprocal(diag));
8046     PetscCall(VecSqrtAbs(diag));
8047     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8048     PetscCall(VecDestroy(&diag));
8049   }
8050   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8051   if (filter >= 0) {
8052     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8053     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8054   }
8055   *a_Gmat = Gmat;
8056   PetscFunctionReturn(PETSC_SUCCESS);
8057 }
8058 
8059 /*
8060     Special version for direct calls from Fortran
8061 */
8062 
8063 /* Change these macros so can be used in void function */
8064 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8065 #undef PetscCall
8066 #define PetscCall(...) \
8067   do { \
8068     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8069     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8070       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8071       return; \
8072     } \
8073   } while (0)
8074 
8075 #undef SETERRQ
8076 #define SETERRQ(comm, ierr, ...) \
8077   do { \
8078     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8079     return; \
8080   } while (0)
8081 
8082 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8083   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8084 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8085   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8086 #else
8087 #endif
8088 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8089 {
8090   Mat         mat = *mmat;
8091   PetscInt    m = *mm, n = *mn;
8092   InsertMode  addv = *maddv;
8093   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8094   PetscScalar value;
8095 
8096   MatCheckPreallocated(mat, 1);
8097   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8098   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8099   {
8100     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8101     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8102     PetscBool roworiented = aij->roworiented;
8103 
8104     /* Some Variables required in the macro */
8105     Mat         A     = aij->A;
8106     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8107     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8108     MatScalar  *aa;
8109     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8110     Mat         B                 = aij->B;
8111     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8112     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8113     MatScalar  *ba;
8114     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8115      * cannot use "#if defined" inside a macro. */
8116     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8117 
8118     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8119     PetscInt   nonew = a->nonew;
8120     MatScalar *ap1, *ap2;
8121 
8122     PetscFunctionBegin;
8123     PetscCall(MatSeqAIJGetArray(A, &aa));
8124     PetscCall(MatSeqAIJGetArray(B, &ba));
8125     for (i = 0; i < m; i++) {
8126       if (im[i] < 0) continue;
8127       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8128       if (im[i] >= rstart && im[i] < rend) {
8129         row      = im[i] - rstart;
8130         lastcol1 = -1;
8131         rp1      = aj + ai[row];
8132         ap1      = aa + ai[row];
8133         rmax1    = aimax[row];
8134         nrow1    = ailen[row];
8135         low1     = 0;
8136         high1    = nrow1;
8137         lastcol2 = -1;
8138         rp2      = bj + bi[row];
8139         ap2      = ba + bi[row];
8140         rmax2    = bimax[row];
8141         nrow2    = bilen[row];
8142         low2     = 0;
8143         high2    = nrow2;
8144 
8145         for (j = 0; j < n; j++) {
8146           if (roworiented) value = v[i * n + j];
8147           else value = v[i + j * m];
8148           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8149           if (in[j] >= cstart && in[j] < cend) {
8150             col = in[j] - cstart;
8151             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8152           } else if (in[j] < 0) continue;
8153           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8154             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8155           } else {
8156             if (mat->was_assembled) {
8157               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8158 #if defined(PETSC_USE_CTABLE)
8159               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8160               col--;
8161 #else
8162               col = aij->colmap[in[j]] - 1;
8163 #endif
8164               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8165                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8166                 col = in[j];
8167                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8168                 B        = aij->B;
8169                 b        = (Mat_SeqAIJ *)B->data;
8170                 bimax    = b->imax;
8171                 bi       = b->i;
8172                 bilen    = b->ilen;
8173                 bj       = b->j;
8174                 rp2      = bj + bi[row];
8175                 ap2      = ba + bi[row];
8176                 rmax2    = bimax[row];
8177                 nrow2    = bilen[row];
8178                 low2     = 0;
8179                 high2    = nrow2;
8180                 bm       = aij->B->rmap->n;
8181                 ba       = b->a;
8182                 inserted = PETSC_FALSE;
8183               }
8184             } else col = in[j];
8185             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8186           }
8187         }
8188       } else if (!aij->donotstash) {
8189         if (roworiented) {
8190           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8191         } else {
8192           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8193         }
8194       }
8195     }
8196     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8197     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8198   }
8199   PetscFunctionReturnVoid();
8200 }
8201 
8202 /* Undefining these here since they were redefined from their original definition above! No
8203  * other PETSc functions should be defined past this point, as it is impossible to recover the
8204  * original definitions */
8205 #undef PetscCall
8206 #undef SETERRQ
8207