xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e0b7e82fd3cf27fce84cc3e37e8d70a5c36a2d4e)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
170 {
171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
172 
173   PetscFunctionBegin;
174   if (mat->A) {
175     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
176     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
182 {
183   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
184   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
185   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
186   const PetscInt  *ia, *ib;
187   const MatScalar *aa, *bb, *aav, *bav;
188   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
189   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
190 
191   PetscFunctionBegin;
192   *keptrows = NULL;
193 
194   ia = a->i;
195   ib = b->i;
196   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
197   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
198   for (i = 0; i < m; i++) {
199     na = ia[i + 1] - ia[i];
200     nb = ib[i + 1] - ib[i];
201     if (!na && !nb) {
202       cnt++;
203       goto ok1;
204     }
205     aa = aav + ia[i];
206     for (j = 0; j < na; j++) {
207       if (aa[j] != 0.0) goto ok1;
208     }
209     bb = PetscSafePointerPlusOffset(bav, ib[i]);
210     for (j = 0; j < nb; j++) {
211       if (bb[j] != 0.0) goto ok1;
212     }
213     cnt++;
214   ok1:;
215   }
216   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
217   if (!n0rows) {
218     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
220     PetscFunctionReturn(PETSC_SUCCESS);
221   }
222   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
223   cnt = 0;
224   for (i = 0; i < m; i++) {
225     na = ia[i + 1] - ia[i];
226     nb = ib[i + 1] - ib[i];
227     if (!na && !nb) continue;
228     aa = aav + ia[i];
229     for (j = 0; j < na; j++) {
230       if (aa[j] != 0.0) {
231         rows[cnt++] = rstart + i;
232         goto ok2;
233       }
234     }
235     bb = PetscSafePointerPlusOffset(bav, ib[i]);
236     for (j = 0; j < nb; j++) {
237       if (bb[j] != 0.0) {
238         rows[cnt++] = rstart + i;
239         goto ok2;
240       }
241     }
242   ok2:;
243   }
244   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
245   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
251 {
252   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
253   PetscBool   cong;
254 
255   PetscFunctionBegin;
256   PetscCall(MatHasCongruentLayouts(Y, &cong));
257   if (Y->assembled && cong) {
258     PetscCall(MatDiagonalSet(aij->A, D, is));
259   } else {
260     PetscCall(MatDiagonalSet_Default(Y, D, is));
261   }
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
266 {
267   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
268   PetscInt    i, rstart, nrows, *rows;
269 
270   PetscFunctionBegin;
271   *zrows = NULL;
272   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
273   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
274   for (i = 0; i < nrows; i++) rows[i] += rstart;
275   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
280 {
281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
282   PetscInt           i, m, n, *garray = aij->garray;
283   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
284   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
285   PetscReal         *work;
286   const PetscScalar *dummy;
287 
288   PetscFunctionBegin;
289   PetscCall(MatGetSize(A, &m, &n));
290   PetscCall(PetscCalloc1(n, &work));
291   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
292   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
294   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
295   if (type == NORM_2) {
296     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
297     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
298   } else if (type == NORM_1) {
299     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
300     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
301   } else if (type == NORM_INFINITY) {
302     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
303     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
304   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
305     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
306     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
307   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
308     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
309     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
310   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
311   if (type == NORM_INFINITY) {
312     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
313   } else {
314     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
315   }
316   PetscCall(PetscFree(work));
317   if (type == NORM_2) {
318     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
319   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
320     for (i = 0; i < n; i++) reductions[i] /= m;
321   }
322   PetscFunctionReturn(PETSC_SUCCESS);
323 }
324 
325 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
326 {
327   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
328   IS              sis, gis;
329   const PetscInt *isis, *igis;
330   PetscInt        n, *iis, nsis, ngis, rstart, i;
331 
332   PetscFunctionBegin;
333   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
334   PetscCall(MatFindNonzeroRows(a->B, &gis));
335   PetscCall(ISGetSize(gis, &ngis));
336   PetscCall(ISGetSize(sis, &nsis));
337   PetscCall(ISGetIndices(sis, &isis));
338   PetscCall(ISGetIndices(gis, &igis));
339 
340   PetscCall(PetscMalloc1(ngis + nsis, &iis));
341   PetscCall(PetscArraycpy(iis, igis, ngis));
342   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
343   n = ngis + nsis;
344   PetscCall(PetscSortRemoveDupsInt(&n, iis));
345   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
346   for (i = 0; i < n; i++) iis[i] += rstart;
347   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
348 
349   PetscCall(ISRestoreIndices(sis, &isis));
350   PetscCall(ISRestoreIndices(gis, &igis));
351   PetscCall(ISDestroy(&sis));
352   PetscCall(ISDestroy(&gis));
353   PetscFunctionReturn(PETSC_SUCCESS);
354 }
355 
356 /*
357   Local utility routine that creates a mapping from the global column
358 number to the local number in the off-diagonal part of the local
359 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
360 a slightly higher hash table cost; without it it is not scalable (each processor
361 has an order N integer array but is fast to access.
362 */
363 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
364 {
365   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
366   PetscInt    n   = aij->B->cmap->n, i;
367 
368   PetscFunctionBegin;
369   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
370 #if defined(PETSC_USE_CTABLE)
371   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
372   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
373 #else
374   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
375   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
376 #endif
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
381   do { \
382     if (col <= lastcol1) low1 = 0; \
383     else high1 = nrow1; \
384     lastcol1 = col; \
385     while (high1 - low1 > 5) { \
386       t = (low1 + high1) / 2; \
387       if (rp1[t] > col) high1 = t; \
388       else low1 = t; \
389     } \
390     for (_i = low1; _i < high1; _i++) { \
391       if (rp1[_i] > col) break; \
392       if (rp1[_i] == col) { \
393         if (addv == ADD_VALUES) { \
394           ap1[_i] += value; \
395           /* Not sure LogFlops will slow dow the code or not */ \
396           (void)PetscLogFlops(1.0); \
397         } else ap1[_i] = value; \
398         goto a_noinsert; \
399       } \
400     } \
401     if (value == 0.0 && ignorezeroentries && row != col) { \
402       low1  = 0; \
403       high1 = nrow1; \
404       goto a_noinsert; \
405     } \
406     if (nonew == 1) { \
407       low1  = 0; \
408       high1 = nrow1; \
409       goto a_noinsert; \
410     } \
411     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
412     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
413     N = nrow1++ - 1; \
414     a->nz++; \
415     high1++; \
416     /* shift up all the later entries in this row */ \
417     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
418     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
419     rp1[_i] = col; \
420     ap1[_i] = value; \
421     A->nonzerostate++; \
422   a_noinsert:; \
423     ailen[row] = nrow1; \
424   } while (0)
425 
426 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
427   do { \
428     if (col <= lastcol2) low2 = 0; \
429     else high2 = nrow2; \
430     lastcol2 = col; \
431     while (high2 - low2 > 5) { \
432       t = (low2 + high2) / 2; \
433       if (rp2[t] > col) high2 = t; \
434       else low2 = t; \
435     } \
436     for (_i = low2; _i < high2; _i++) { \
437       if (rp2[_i] > col) break; \
438       if (rp2[_i] == col) { \
439         if (addv == ADD_VALUES) { \
440           ap2[_i] += value; \
441           (void)PetscLogFlops(1.0); \
442         } else ap2[_i] = value; \
443         goto b_noinsert; \
444       } \
445     } \
446     if (value == 0.0 && ignorezeroentries) { \
447       low2  = 0; \
448       high2 = nrow2; \
449       goto b_noinsert; \
450     } \
451     if (nonew == 1) { \
452       low2  = 0; \
453       high2 = nrow2; \
454       goto b_noinsert; \
455     } \
456     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
457     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
458     N = nrow2++ - 1; \
459     b->nz++; \
460     high2++; \
461     /* shift up all the later entries in this row */ \
462     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
463     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
464     rp2[_i] = col; \
465     ap2[_i] = value; \
466     B->nonzerostate++; \
467   b_noinsert:; \
468     bilen[row] = nrow2; \
469   } while (0)
470 
471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
472 {
473   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
474   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
475   PetscInt     l, *garray                         = mat->garray, diag;
476   PetscScalar *aa, *ba;
477 
478   PetscFunctionBegin;
479   /* code only works for square matrices A */
480 
481   /* find size of row to the left of the diagonal part */
482   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
483   row = row - diag;
484   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
485     if (garray[b->j[b->i[row] + l]] > diag) break;
486   }
487   if (l) {
488     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
489     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
490     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
491   }
492 
493   /* diagonal part */
494   if (a->i[row + 1] - a->i[row]) {
495     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
496     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
497     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
498   }
499 
500   /* right of diagonal part */
501   if (b->i[row + 1] - b->i[row] - l) {
502     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
503     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
504     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
505   }
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
510 {
511   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
512   PetscScalar value = 0.0;
513   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
514   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
515   PetscBool   roworiented = aij->roworiented;
516 
517   /* Some Variables required in the macro */
518   Mat         A     = aij->A;
519   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
520   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
521   PetscBool   ignorezeroentries = a->ignorezeroentries;
522   Mat         B                 = aij->B;
523   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
524   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
525   MatScalar  *aa, *ba;
526   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
527   PetscInt    nonew;
528   MatScalar  *ap1, *ap2;
529 
530   PetscFunctionBegin;
531   PetscCall(MatSeqAIJGetArray(A, &aa));
532   PetscCall(MatSeqAIJGetArray(B, &ba));
533   for (i = 0; i < m; i++) {
534     if (im[i] < 0) continue;
535     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
536     if (im[i] >= rstart && im[i] < rend) {
537       row      = im[i] - rstart;
538       lastcol1 = -1;
539       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
540       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
541       rmax1    = aimax[row];
542       nrow1    = ailen[row];
543       low1     = 0;
544       high1    = nrow1;
545       lastcol2 = -1;
546       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
547       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
548       rmax2    = bimax[row];
549       nrow2    = bilen[row];
550       low2     = 0;
551       high2    = nrow2;
552 
553       for (j = 0; j < n; j++) {
554         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
555         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
556         if (in[j] >= cstart && in[j] < cend) {
557           col   = in[j] - cstart;
558           nonew = a->nonew;
559           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
560         } else if (in[j] < 0) {
561           continue;
562         } else {
563           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
564           if (mat->was_assembled) {
565             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
566 #if defined(PETSC_USE_CTABLE)
567             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
568             col--;
569 #else
570             col = aij->colmap[in[j]] - 1;
571 #endif
572             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
573               PetscCall(MatDisAssemble_MPIAIJ(mat));               /* Change aij->B from reduced/local format to expanded/global format */
574               col = in[j];
575               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
576               B     = aij->B;
577               b     = (Mat_SeqAIJ *)B->data;
578               bimax = b->imax;
579               bi    = b->i;
580               bilen = b->ilen;
581               bj    = b->j;
582               ba    = b->a;
583               rp2   = bj + bi[row];
584               ap2   = ba + bi[row];
585               rmax2 = bimax[row];
586               nrow2 = bilen[row];
587               low2  = 0;
588               high2 = nrow2;
589               bm    = aij->B->rmap->n;
590               ba    = b->a;
591             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
592               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
593                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
594               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
595             }
596           } else col = in[j];
597           nonew = b->nonew;
598           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
599         }
600       }
601     } else {
602       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
603       if (!aij->donotstash) {
604         mat->assembled = PETSC_FALSE;
605         if (roworiented) {
606           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
607         } else {
608           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
609         }
610       }
611     }
612   }
613   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
614   PetscCall(MatSeqAIJRestoreArray(B, &ba));
615   PetscFunctionReturn(PETSC_SUCCESS);
616 }
617 
618 /*
619     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
620     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
621     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
622 */
623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
624 {
625   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
626   Mat         A      = aij->A; /* diagonal part of the matrix */
627   Mat         B      = aij->B; /* off-diagonal part of the matrix */
628   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
629   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
630   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
631   PetscInt   *ailen = a->ilen, *aj = a->j;
632   PetscInt   *bilen = b->ilen, *bj = b->j;
633   PetscInt    am          = aij->A->rmap->n, j;
634   PetscInt    diag_so_far = 0, dnz;
635   PetscInt    offd_so_far = 0, onz;
636 
637   PetscFunctionBegin;
638   /* Iterate over all rows of the matrix */
639   for (j = 0; j < am; j++) {
640     dnz = onz = 0;
641     /*  Iterate over all non-zero columns of the current row */
642     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
643       /* If column is in the diagonal */
644       if (mat_j[col] >= cstart && mat_j[col] < cend) {
645         aj[diag_so_far++] = mat_j[col] - cstart;
646         dnz++;
647       } else { /* off-diagonal entries */
648         bj[offd_so_far++] = mat_j[col];
649         onz++;
650       }
651     }
652     ailen[j] = dnz;
653     bilen[j] = onz;
654   }
655   PetscFunctionReturn(PETSC_SUCCESS);
656 }
657 
658 /*
659     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
660     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
661     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
662     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
663     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
664 */
665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
666 {
667   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
668   Mat          A    = aij->A; /* diagonal part of the matrix */
669   Mat          B    = aij->B; /* off-diagonal part of the matrix */
670   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
671   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
672   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
673   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
674   PetscInt    *ailen = a->ilen, *aj = a->j;
675   PetscInt    *bilen = b->ilen, *bj = b->j;
676   PetscInt     am          = aij->A->rmap->n, j;
677   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
678   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
679   PetscScalar *aa = a->a, *ba = b->a;
680 
681   PetscFunctionBegin;
682   /* Iterate over all rows of the matrix */
683   for (j = 0; j < am; j++) {
684     dnz_row = onz_row = 0;
685     rowstart_offd     = full_offd_i[j];
686     rowstart_diag     = full_diag_i[j];
687     /*  Iterate over all non-zero columns of the current row */
688     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
689       /* If column is in the diagonal */
690       if (mat_j[col] >= cstart && mat_j[col] < cend) {
691         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
692         aa[rowstart_diag + dnz_row] = mat_a[col];
693         dnz_row++;
694       } else { /* off-diagonal entries */
695         bj[rowstart_offd + onz_row] = mat_j[col];
696         ba[rowstart_offd + onz_row] = mat_a[col];
697         onz_row++;
698       }
699     }
700     ailen[j] = dnz_row;
701     bilen[j] = onz_row;
702   }
703   PetscFunctionReturn(PETSC_SUCCESS);
704 }
705 
706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
707 {
708   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
709   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
710   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
711 
712   PetscFunctionBegin;
713   for (i = 0; i < m; i++) {
714     if (idxm[i] < 0) continue; /* negative row */
715     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
716     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
717     row = idxm[i] - rstart;
718     for (j = 0; j < n; j++) {
719       if (idxn[j] < 0) continue; /* negative column */
720       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
721       if (idxn[j] >= cstart && idxn[j] < cend) {
722         col = idxn[j] - cstart;
723         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
724       } else {
725         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
726 #if defined(PETSC_USE_CTABLE)
727         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
728         col--;
729 #else
730         col = aij->colmap[idxn[j]] - 1;
731 #endif
732         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
733         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
734       }
735     }
736   }
737   PetscFunctionReturn(PETSC_SUCCESS);
738 }
739 
740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
741 {
742   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
743   PetscInt    nstash, reallocs;
744 
745   PetscFunctionBegin;
746   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
747 
748   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
749   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
750   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
751   PetscFunctionReturn(PETSC_SUCCESS);
752 }
753 
754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
755 {
756   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
757   PetscMPIInt  n;
758   PetscInt     i, j, rstart, ncols, flg;
759   PetscInt    *row, *col;
760   PetscBool    other_disassembled;
761   PetscScalar *val;
762 
763   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
764 
765   PetscFunctionBegin;
766   if (!aij->donotstash && !mat->nooffprocentries) {
767     while (1) {
768       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
769       if (!flg) break;
770 
771       for (i = 0; i < n;) {
772         /* Now identify the consecutive vals belonging to the same row */
773         for (j = i, rstart = row[j]; j < n; j++) {
774           if (row[j] != rstart) break;
775         }
776         if (j < n) ncols = j - i;
777         else ncols = n - i;
778         /* Now assemble all these values with a single function call */
779         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
780         i = j;
781       }
782     }
783     PetscCall(MatStashScatterEnd_Private(&mat->stash));
784   }
785 #if defined(PETSC_HAVE_DEVICE)
786   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
787   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
788   if (mat->boundtocpu) {
789     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
790     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
791   }
792 #endif
793   PetscCall(MatAssemblyBegin(aij->A, mode));
794   PetscCall(MatAssemblyEnd(aij->A, mode));
795 
796   /* determine if any processor has disassembled, if so we must
797      also disassemble ourself, in order that we may reassemble. */
798   /*
799      if nonzero structure of submatrix B cannot change then we know that
800      no processor disassembled thus we can skip this stuff
801   */
802   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
803     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
804     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
805       PetscCall(MatDisAssemble_MPIAIJ(mat));
806     }
807   }
808   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
809   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
810 #if defined(PETSC_HAVE_DEVICE)
811   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
812 #endif
813   PetscCall(MatAssemblyBegin(aij->B, mode));
814   PetscCall(MatAssemblyEnd(aij->B, mode));
815 
816   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
817 
818   aij->rowvalues = NULL;
819 
820   PetscCall(VecDestroy(&aij->diag));
821 
822   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
823   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
824     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
825     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
826   }
827 #if defined(PETSC_HAVE_DEVICE)
828   mat->offloadmask = PETSC_OFFLOAD_BOTH;
829 #endif
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
834 {
835   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
836 
837   PetscFunctionBegin;
838   PetscCall(MatZeroEntries(l->A));
839   PetscCall(MatZeroEntries(l->B));
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
844 {
845   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
846   PetscInt   *lrows;
847   PetscInt    r, len;
848   PetscBool   cong;
849 
850   PetscFunctionBegin;
851   /* get locally owned rows */
852   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
853   PetscCall(MatHasCongruentLayouts(A, &cong));
854   /* fix right-hand side if needed */
855   if (x && b) {
856     const PetscScalar *xx;
857     PetscScalar       *bb;
858 
859     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
860     PetscCall(VecGetArrayRead(x, &xx));
861     PetscCall(VecGetArray(b, &bb));
862     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
863     PetscCall(VecRestoreArrayRead(x, &xx));
864     PetscCall(VecRestoreArray(b, &bb));
865   }
866 
867   if (diag != 0.0 && cong) {
868     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
869     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
870   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
871     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
872     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
873     PetscInt    nnwA, nnwB;
874     PetscBool   nnzA, nnzB;
875 
876     nnwA = aijA->nonew;
877     nnwB = aijB->nonew;
878     nnzA = aijA->keepnonzeropattern;
879     nnzB = aijB->keepnonzeropattern;
880     if (!nnzA) {
881       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
882       aijA->nonew = 0;
883     }
884     if (!nnzB) {
885       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
886       aijB->nonew = 0;
887     }
888     /* Must zero here before the next loop */
889     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
890     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
891     for (r = 0; r < len; ++r) {
892       const PetscInt row = lrows[r] + A->rmap->rstart;
893       if (row >= A->cmap->N) continue;
894       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
895     }
896     aijA->nonew = nnwA;
897     aijB->nonew = nnwB;
898   } else {
899     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
900     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
901   }
902   PetscCall(PetscFree(lrows));
903   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
904   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
905 
906   /* only change matrix nonzero state if pattern was allowed to be changed */
907   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
908     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
909     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
910   }
911   PetscFunctionReturn(PETSC_SUCCESS);
912 }
913 
914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
915 {
916   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
917   PetscMPIInt        n = A->rmap->n;
918   PetscInt           i, j, r, m, len = 0;
919   PetscInt          *lrows, *owners = A->rmap->range;
920   PetscMPIInt        p = 0;
921   PetscSFNode       *rrows;
922   PetscSF            sf;
923   const PetscScalar *xx;
924   PetscScalar       *bb, *mask, *aij_a;
925   Vec                xmask, lmask;
926   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
927   const PetscInt    *aj, *ii, *ridx;
928   PetscScalar       *aa;
929 
930   PetscFunctionBegin;
931   /* Create SF where leaves are input rows and roots are owned rows */
932   PetscCall(PetscMalloc1(n, &lrows));
933   for (r = 0; r < n; ++r) lrows[r] = -1;
934   PetscCall(PetscMalloc1(N, &rrows));
935   for (r = 0; r < N; ++r) {
936     const PetscInt idx = rows[r];
937     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
938     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
939       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
940     }
941     rrows[r].rank  = p;
942     rrows[r].index = rows[r] - owners[p];
943   }
944   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
945   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
946   /* Collect flags for rows to be zeroed */
947   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
948   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
949   PetscCall(PetscSFDestroy(&sf));
950   /* Compress and put in row numbers */
951   for (r = 0; r < n; ++r)
952     if (lrows[r] >= 0) lrows[len++] = r;
953   /* zero diagonal part of matrix */
954   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
955   /* handle off-diagonal part of matrix */
956   PetscCall(MatCreateVecs(A, &xmask, NULL));
957   PetscCall(VecDuplicate(l->lvec, &lmask));
958   PetscCall(VecGetArray(xmask, &bb));
959   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
960   PetscCall(VecRestoreArray(xmask, &bb));
961   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
962   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
963   PetscCall(VecDestroy(&xmask));
964   if (x && b) { /* this code is buggy when the row and column layout don't match */
965     PetscBool cong;
966 
967     PetscCall(MatHasCongruentLayouts(A, &cong));
968     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
969     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
970     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
971     PetscCall(VecGetArrayRead(l->lvec, &xx));
972     PetscCall(VecGetArray(b, &bb));
973   }
974   PetscCall(VecGetArray(lmask, &mask));
975   /* remove zeroed rows of off-diagonal matrix */
976   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
977   ii = aij->i;
978   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
979   /* loop over all elements of off process part of matrix zeroing removed columns*/
980   if (aij->compressedrow.use) {
981     m    = aij->compressedrow.nrows;
982     ii   = aij->compressedrow.i;
983     ridx = aij->compressedrow.rindex;
984     for (i = 0; i < m; i++) {
985       n  = ii[i + 1] - ii[i];
986       aj = aij->j + ii[i];
987       aa = aij_a + ii[i];
988 
989       for (j = 0; j < n; j++) {
990         if (PetscAbsScalar(mask[*aj])) {
991           if (b) bb[*ridx] -= *aa * xx[*aj];
992           *aa = 0.0;
993         }
994         aa++;
995         aj++;
996       }
997       ridx++;
998     }
999   } else { /* do not use compressed row format */
1000     m = l->B->rmap->n;
1001     for (i = 0; i < m; i++) {
1002       n  = ii[i + 1] - ii[i];
1003       aj = aij->j + ii[i];
1004       aa = aij_a + ii[i];
1005       for (j = 0; j < n; j++) {
1006         if (PetscAbsScalar(mask[*aj])) {
1007           if (b) bb[i] -= *aa * xx[*aj];
1008           *aa = 0.0;
1009         }
1010         aa++;
1011         aj++;
1012       }
1013     }
1014   }
1015   if (x && b) {
1016     PetscCall(VecRestoreArray(b, &bb));
1017     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1018   }
1019   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1020   PetscCall(VecRestoreArray(lmask, &mask));
1021   PetscCall(VecDestroy(&lmask));
1022   PetscCall(PetscFree(lrows));
1023 
1024   /* only change matrix nonzero state if pattern was allowed to be changed */
1025   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1026     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1027     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1028   }
1029   PetscFunctionReturn(PETSC_SUCCESS);
1030 }
1031 
1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1033 {
1034   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1035   PetscInt    nt;
1036   VecScatter  Mvctx = a->Mvctx;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(VecGetLocalSize(xx, &nt));
1040   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1041   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1042   PetscUseTypeMethod(a->A, mult, xx, yy);
1043   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1044   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1045   PetscFunctionReturn(PETSC_SUCCESS);
1046 }
1047 
1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1049 {
1050   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1051 
1052   PetscFunctionBegin;
1053   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1058 {
1059   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1060   VecScatter  Mvctx = a->Mvctx;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1065   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1066   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1071 {
1072   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1073 
1074   PetscFunctionBegin;
1075   /* do nondiagonal part */
1076   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1077   /* do local part */
1078   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1079   /* add partial results together */
1080   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1081   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1082   PetscFunctionReturn(PETSC_SUCCESS);
1083 }
1084 
1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1086 {
1087   MPI_Comm    comm;
1088   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1089   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1090   IS          Me, Notme;
1091   PetscInt    M, N, first, last, *notme, i;
1092   PetscBool   lf;
1093   PetscMPIInt size;
1094 
1095   PetscFunctionBegin;
1096   /* Easy test: symmetric diagonal block */
1097   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1098   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1099   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1100   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1101   PetscCallMPI(MPI_Comm_size(comm, &size));
1102   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1103 
1104   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1105   PetscCall(MatGetSize(Amat, &M, &N));
1106   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1107   PetscCall(PetscMalloc1(N - last + first, &notme));
1108   for (i = 0; i < first; i++) notme[i] = i;
1109   for (i = last; i < M; i++) notme[i - last + first] = i;
1110   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1111   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1112   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1113   Aoff = Aoffs[0];
1114   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1115   Boff = Boffs[0];
1116   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1117   PetscCall(MatDestroyMatrices(1, &Aoffs));
1118   PetscCall(MatDestroyMatrices(1, &Boffs));
1119   PetscCall(ISDestroy(&Me));
1120   PetscCall(ISDestroy(&Notme));
1121   PetscCall(PetscFree(notme));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1126 {
1127   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1128 
1129   PetscFunctionBegin;
1130   /* do nondiagonal part */
1131   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1132   /* do local part */
1133   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1134   /* add partial results together */
1135   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1136   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1137   PetscFunctionReturn(PETSC_SUCCESS);
1138 }
1139 
1140 /*
1141   This only works correctly for square matrices where the subblock A->A is the
1142    diagonal block
1143 */
1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1145 {
1146   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1147 
1148   PetscFunctionBegin;
1149   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1150   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1151   PetscCall(MatGetDiagonal(a->A, v));
1152   PetscFunctionReturn(PETSC_SUCCESS);
1153 }
1154 
1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1156 {
1157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCall(MatScale(a->A, aa));
1161   PetscCall(MatScale(a->B, aa));
1162   PetscFunctionReturn(PETSC_SUCCESS);
1163 }
1164 
1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1166 {
1167   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1168   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1169   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1170   const PetscInt    *garray = aij->garray;
1171   const PetscScalar *aa, *ba;
1172   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1173   PetscInt64         nz, hnz;
1174   PetscInt          *rowlens;
1175   PetscInt          *colidxs;
1176   PetscScalar       *matvals;
1177   PetscMPIInt        rank;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(PetscViewerSetUp(viewer));
1181 
1182   M  = mat->rmap->N;
1183   N  = mat->cmap->N;
1184   m  = mat->rmap->n;
1185   rs = mat->rmap->rstart;
1186   cs = mat->cmap->rstart;
1187   nz = A->nz + B->nz;
1188 
1189   /* write matrix header */
1190   header[0] = MAT_FILE_CLASSID;
1191   header[1] = M;
1192   header[2] = N;
1193   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1194   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1195   if (rank == 0) {
1196     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1197     else header[3] = (PetscInt)hnz;
1198   }
1199   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1200 
1201   /* fill in and store row lengths  */
1202   PetscCall(PetscMalloc1(m, &rowlens));
1203   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1204   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1205   PetscCall(PetscFree(rowlens));
1206 
1207   /* fill in and store column indices */
1208   PetscCall(PetscMalloc1(nz, &colidxs));
1209   for (cnt = 0, i = 0; i < m; i++) {
1210     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1211       if (garray[B->j[jb]] > cs) break;
1212       colidxs[cnt++] = garray[B->j[jb]];
1213     }
1214     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1215     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1216   }
1217   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1218   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1219   PetscCall(PetscFree(colidxs));
1220 
1221   /* fill in and store nonzero values */
1222   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1223   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1224   PetscCall(PetscMalloc1(nz, &matvals));
1225   for (cnt = 0, i = 0; i < m; i++) {
1226     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1227       if (garray[B->j[jb]] > cs) break;
1228       matvals[cnt++] = ba[jb];
1229     }
1230     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1231     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1232   }
1233   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1234   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1235   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1236   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1237   PetscCall(PetscFree(matvals));
1238 
1239   /* write block size option to the viewer's .info file */
1240   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 #include <petscdraw.h>
1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1246 {
1247   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1248   PetscMPIInt       rank = aij->rank, size = aij->size;
1249   PetscBool         isdraw, iascii, isbinary;
1250   PetscViewer       sviewer;
1251   PetscViewerFormat format;
1252 
1253   PetscFunctionBegin;
1254   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1255   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1256   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1257   if (iascii) {
1258     PetscCall(PetscViewerGetFormat(viewer, &format));
1259     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1260       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1261       PetscCall(PetscMalloc1(size, &nz));
1262       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1263       for (i = 0; i < (PetscInt)size; i++) {
1264         nmax = PetscMax(nmax, nz[i]);
1265         nmin = PetscMin(nmin, nz[i]);
1266         navg += nz[i];
1267       }
1268       PetscCall(PetscFree(nz));
1269       navg = navg / size;
1270       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1271       PetscFunctionReturn(PETSC_SUCCESS);
1272     }
1273     PetscCall(PetscViewerGetFormat(viewer, &format));
1274     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1275       MatInfo   info;
1276       PetscInt *inodes = NULL;
1277 
1278       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1279       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1280       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1281       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1282       if (!inodes) {
1283         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1284                                                      (double)info.memory));
1285       } else {
1286         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1287                                                      (double)info.memory));
1288       }
1289       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1290       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1291       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1292       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1293       PetscCall(PetscViewerFlush(viewer));
1294       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1295       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1296       PetscCall(VecScatterView(aij->Mvctx, viewer));
1297       PetscFunctionReturn(PETSC_SUCCESS);
1298     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1299       PetscInt inodecount, inodelimit, *inodes;
1300       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1301       if (inodes) {
1302         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1303       } else {
1304         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1305       }
1306       PetscFunctionReturn(PETSC_SUCCESS);
1307     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1308       PetscFunctionReturn(PETSC_SUCCESS);
1309     }
1310   } else if (isbinary) {
1311     if (size == 1) {
1312       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1313       PetscCall(MatView(aij->A, viewer));
1314     } else {
1315       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1316     }
1317     PetscFunctionReturn(PETSC_SUCCESS);
1318   } else if (iascii && size == 1) {
1319     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1320     PetscCall(MatView(aij->A, viewer));
1321     PetscFunctionReturn(PETSC_SUCCESS);
1322   } else if (isdraw) {
1323     PetscDraw draw;
1324     PetscBool isnull;
1325     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1326     PetscCall(PetscDrawIsNull(draw, &isnull));
1327     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1328   }
1329 
1330   { /* assemble the entire matrix onto first processor */
1331     Mat A = NULL, Av;
1332     IS  isrow, iscol;
1333 
1334     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1335     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1336     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1337     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1338     /*  The commented code uses MatCreateSubMatrices instead */
1339     /*
1340     Mat *AA, A = NULL, Av;
1341     IS  isrow,iscol;
1342 
1343     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1344     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1345     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1346     if (rank == 0) {
1347        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1348        A    = AA[0];
1349        Av   = AA[0];
1350     }
1351     PetscCall(MatDestroySubMatrices(1,&AA));
1352 */
1353     PetscCall(ISDestroy(&iscol));
1354     PetscCall(ISDestroy(&isrow));
1355     /*
1356        Everyone has to call to draw the matrix since the graphics waits are
1357        synchronized across all processors that share the PetscDraw object
1358     */
1359     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1360     if (rank == 0) {
1361       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1362       PetscCall(MatView_SeqAIJ(Av, sviewer));
1363     }
1364     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1365     PetscCall(MatDestroy(&A));
1366   }
1367   PetscFunctionReturn(PETSC_SUCCESS);
1368 }
1369 
1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1371 {
1372   PetscBool iascii, isdraw, issocket, isbinary;
1373 
1374   PetscFunctionBegin;
1375   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1376   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1377   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1378   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1379   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1380   PetscFunctionReturn(PETSC_SUCCESS);
1381 }
1382 
1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1384 {
1385   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1386   Vec         bb1 = NULL;
1387   PetscBool   hasop;
1388 
1389   PetscFunctionBegin;
1390   if (flag == SOR_APPLY_UPPER) {
1391     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1392     PetscFunctionReturn(PETSC_SUCCESS);
1393   }
1394 
1395   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1396 
1397   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1398     if (flag & SOR_ZERO_INITIAL_GUESS) {
1399       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1400       its--;
1401     }
1402 
1403     while (its--) {
1404       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1405       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1406 
1407       /* update rhs: bb1 = bb - B*x */
1408       PetscCall(VecScale(mat->lvec, -1.0));
1409       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1410 
1411       /* local sweep */
1412       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1413     }
1414   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419     while (its--) {
1420       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1421       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422 
1423       /* update rhs: bb1 = bb - B*x */
1424       PetscCall(VecScale(mat->lvec, -1.0));
1425       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1426 
1427       /* local sweep */
1428       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1429     }
1430   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1433       its--;
1434     }
1435     while (its--) {
1436       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438 
1439       /* update rhs: bb1 = bb - B*x */
1440       PetscCall(VecScale(mat->lvec, -1.0));
1441       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1442 
1443       /* local sweep */
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1445     }
1446   } else if (flag & SOR_EISENSTAT) {
1447     Vec xx1;
1448 
1449     PetscCall(VecDuplicate(bb, &xx1));
1450     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1451 
1452     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454     if (!mat->diag) {
1455       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1456       PetscCall(MatGetDiagonal(matin, mat->diag));
1457     }
1458     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1459     if (hasop) {
1460       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1461     } else {
1462       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1463     }
1464     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1465 
1466     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1467 
1468     /* local sweep */
1469     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1470     PetscCall(VecAXPY(xx, 1.0, xx1));
1471     PetscCall(VecDestroy(&xx1));
1472   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1473 
1474   PetscCall(VecDestroy(&bb1));
1475 
1476   matin->factorerrortype = mat->A->factorerrortype;
1477   PetscFunctionReturn(PETSC_SUCCESS);
1478 }
1479 
1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1481 {
1482   Mat             aA, aB, Aperm;
1483   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1484   PetscScalar    *aa, *ba;
1485   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1486   PetscSF         rowsf, sf;
1487   IS              parcolp = NULL;
1488   PetscBool       done;
1489 
1490   PetscFunctionBegin;
1491   PetscCall(MatGetLocalSize(A, &m, &n));
1492   PetscCall(ISGetIndices(rowp, &rwant));
1493   PetscCall(ISGetIndices(colp, &cwant));
1494   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1495 
1496   /* Invert row permutation to find out where my rows should go */
1497   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1498   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1499   PetscCall(PetscSFSetFromOptions(rowsf));
1500   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1501   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1502   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1503 
1504   /* Invert column permutation to find out where my columns should go */
1505   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1506   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1507   PetscCall(PetscSFSetFromOptions(sf));
1508   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1509   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1510   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1511   PetscCall(PetscSFDestroy(&sf));
1512 
1513   PetscCall(ISRestoreIndices(rowp, &rwant));
1514   PetscCall(ISRestoreIndices(colp, &cwant));
1515   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1516 
1517   /* Find out where my gcols should go */
1518   PetscCall(MatGetSize(aB, NULL, &ng));
1519   PetscCall(PetscMalloc1(ng, &gcdest));
1520   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1521   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1522   PetscCall(PetscSFSetFromOptions(sf));
1523   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1524   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1525   PetscCall(PetscSFDestroy(&sf));
1526 
1527   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1528   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1529   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1530   for (i = 0; i < m; i++) {
1531     PetscInt    row = rdest[i];
1532     PetscMPIInt rowner;
1533     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1534     for (j = ai[i]; j < ai[i + 1]; j++) {
1535       PetscInt    col = cdest[aj[j]];
1536       PetscMPIInt cowner;
1537       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1538       if (rowner == cowner) dnnz[i]++;
1539       else onnz[i]++;
1540     }
1541     for (j = bi[i]; j < bi[i + 1]; j++) {
1542       PetscInt    col = gcdest[bj[j]];
1543       PetscMPIInt cowner;
1544       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1545       if (rowner == cowner) dnnz[i]++;
1546       else onnz[i]++;
1547     }
1548   }
1549   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1550   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1551   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1552   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1553   PetscCall(PetscSFDestroy(&rowsf));
1554 
1555   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1556   PetscCall(MatSeqAIJGetArray(aA, &aa));
1557   PetscCall(MatSeqAIJGetArray(aB, &ba));
1558   for (i = 0; i < m; i++) {
1559     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1560     PetscInt  j0, rowlen;
1561     rowlen = ai[i + 1] - ai[i];
1562     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1563       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1564       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1565     }
1566     rowlen = bi[i + 1] - bi[i];
1567     for (j0 = j = 0; j < rowlen; j0 = j) {
1568       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1569       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1570     }
1571   }
1572   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1573   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1574   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1575   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1576   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1577   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1578   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1579   PetscCall(PetscFree3(work, rdest, cdest));
1580   PetscCall(PetscFree(gcdest));
1581   if (parcolp) PetscCall(ISDestroy(&colp));
1582   *B = Aperm;
1583   PetscFunctionReturn(PETSC_SUCCESS);
1584 }
1585 
1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1587 {
1588   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1589 
1590   PetscFunctionBegin;
1591   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1592   if (ghosts) *ghosts = aij->garray;
1593   PetscFunctionReturn(PETSC_SUCCESS);
1594 }
1595 
1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1597 {
1598   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1599   Mat            A = mat->A, B = mat->B;
1600   PetscLogDouble isend[5], irecv[5];
1601 
1602   PetscFunctionBegin;
1603   info->block_size = 1.0;
1604   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1605 
1606   isend[0] = info->nz_used;
1607   isend[1] = info->nz_allocated;
1608   isend[2] = info->nz_unneeded;
1609   isend[3] = info->memory;
1610   isend[4] = info->mallocs;
1611 
1612   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1613 
1614   isend[0] += info->nz_used;
1615   isend[1] += info->nz_allocated;
1616   isend[2] += info->nz_unneeded;
1617   isend[3] += info->memory;
1618   isend[4] += info->mallocs;
1619   if (flag == MAT_LOCAL) {
1620     info->nz_used      = isend[0];
1621     info->nz_allocated = isend[1];
1622     info->nz_unneeded  = isend[2];
1623     info->memory       = isend[3];
1624     info->mallocs      = isend[4];
1625   } else if (flag == MAT_GLOBAL_MAX) {
1626     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1627 
1628     info->nz_used      = irecv[0];
1629     info->nz_allocated = irecv[1];
1630     info->nz_unneeded  = irecv[2];
1631     info->memory       = irecv[3];
1632     info->mallocs      = irecv[4];
1633   } else if (flag == MAT_GLOBAL_SUM) {
1634     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1635 
1636     info->nz_used      = irecv[0];
1637     info->nz_allocated = irecv[1];
1638     info->nz_unneeded  = irecv[2];
1639     info->memory       = irecv[3];
1640     info->mallocs      = irecv[4];
1641   }
1642   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1643   info->fill_ratio_needed = 0;
1644   info->factor_mallocs    = 0;
1645   PetscFunctionReturn(PETSC_SUCCESS);
1646 }
1647 
1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1649 {
1650   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1651 
1652   PetscFunctionBegin;
1653   switch (op) {
1654   case MAT_NEW_NONZERO_LOCATIONS:
1655   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1656   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1657   case MAT_KEEP_NONZERO_PATTERN:
1658   case MAT_NEW_NONZERO_LOCATION_ERR:
1659   case MAT_USE_INODES:
1660   case MAT_IGNORE_ZERO_ENTRIES:
1661   case MAT_FORM_EXPLICIT_TRANSPOSE:
1662     MatCheckPreallocated(A, 1);
1663     PetscCall(MatSetOption(a->A, op, flg));
1664     PetscCall(MatSetOption(a->B, op, flg));
1665     break;
1666   case MAT_ROW_ORIENTED:
1667     MatCheckPreallocated(A, 1);
1668     a->roworiented = flg;
1669 
1670     PetscCall(MatSetOption(a->A, op, flg));
1671     PetscCall(MatSetOption(a->B, op, flg));
1672     break;
1673   case MAT_FORCE_DIAGONAL_ENTRIES:
1674   case MAT_SORTED_FULL:
1675     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1676     break;
1677   case MAT_IGNORE_OFF_PROC_ENTRIES:
1678     a->donotstash = flg;
1679     break;
1680   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1681   case MAT_SPD:
1682   case MAT_SYMMETRIC:
1683   case MAT_STRUCTURALLY_SYMMETRIC:
1684   case MAT_HERMITIAN:
1685   case MAT_SYMMETRY_ETERNAL:
1686   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1687   case MAT_SPD_ETERNAL:
1688     /* if the diagonal matrix is square it inherits some of the properties above */
1689     break;
1690   case MAT_SUBMAT_SINGLEIS:
1691     A->submat_singleis = flg;
1692     break;
1693   case MAT_STRUCTURE_ONLY:
1694     /* The option is handled directly by MatSetOption() */
1695     break;
1696   default:
1697     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1698   }
1699   PetscFunctionReturn(PETSC_SUCCESS);
1700 }
1701 
1702 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1703 {
1704   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1705   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1706   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1707   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1708   PetscInt    *cmap, *idx_p;
1709 
1710   PetscFunctionBegin;
1711   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1712   mat->getrowactive = PETSC_TRUE;
1713 
1714   if (!mat->rowvalues && (idx || v)) {
1715     /*
1716         allocate enough space to hold information from the longest row.
1717     */
1718     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1719     PetscInt    max = 1, tmp;
1720     for (i = 0; i < matin->rmap->n; i++) {
1721       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1722       if (max < tmp) max = tmp;
1723     }
1724     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1725   }
1726 
1727   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1728   lrow = row - rstart;
1729 
1730   pvA = &vworkA;
1731   pcA = &cworkA;
1732   pvB = &vworkB;
1733   pcB = &cworkB;
1734   if (!v) {
1735     pvA = NULL;
1736     pvB = NULL;
1737   }
1738   if (!idx) {
1739     pcA = NULL;
1740     if (!v) pcB = NULL;
1741   }
1742   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1743   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1744   nztot = nzA + nzB;
1745 
1746   cmap = mat->garray;
1747   if (v || idx) {
1748     if (nztot) {
1749       /* Sort by increasing column numbers, assuming A and B already sorted */
1750       PetscInt imark = -1;
1751       if (v) {
1752         *v = v_p = mat->rowvalues;
1753         for (i = 0; i < nzB; i++) {
1754           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1755           else break;
1756         }
1757         imark = i;
1758         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1759         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1760       }
1761       if (idx) {
1762         *idx = idx_p = mat->rowindices;
1763         if (imark > -1) {
1764           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1765         } else {
1766           for (i = 0; i < nzB; i++) {
1767             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1768             else break;
1769           }
1770           imark = i;
1771         }
1772         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1773         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1774       }
1775     } else {
1776       if (idx) *idx = NULL;
1777       if (v) *v = NULL;
1778     }
1779   }
1780   *nz = nztot;
1781   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1782   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1783   PetscFunctionReturn(PETSC_SUCCESS);
1784 }
1785 
1786 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1787 {
1788   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1789 
1790   PetscFunctionBegin;
1791   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1792   aij->getrowactive = PETSC_FALSE;
1793   PetscFunctionReturn(PETSC_SUCCESS);
1794 }
1795 
1796 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1797 {
1798   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1799   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1800   PetscInt         i, j, cstart = mat->cmap->rstart;
1801   PetscReal        sum = 0.0;
1802   const MatScalar *v, *amata, *bmata;
1803 
1804   PetscFunctionBegin;
1805   if (aij->size == 1) {
1806     PetscCall(MatNorm(aij->A, type, norm));
1807   } else {
1808     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1809     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1810     if (type == NORM_FROBENIUS) {
1811       v = amata;
1812       for (i = 0; i < amat->nz; i++) {
1813         sum += PetscRealPart(PetscConj(*v) * (*v));
1814         v++;
1815       }
1816       v = bmata;
1817       for (i = 0; i < bmat->nz; i++) {
1818         sum += PetscRealPart(PetscConj(*v) * (*v));
1819         v++;
1820       }
1821       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1822       *norm = PetscSqrtReal(*norm);
1823       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1824     } else if (type == NORM_1) { /* max column norm */
1825       PetscReal *tmp, *tmp2;
1826       PetscInt  *jj, *garray = aij->garray;
1827       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1828       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1829       *norm = 0.0;
1830       v     = amata;
1831       jj    = amat->j;
1832       for (j = 0; j < amat->nz; j++) {
1833         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1834         v++;
1835       }
1836       v  = bmata;
1837       jj = bmat->j;
1838       for (j = 0; j < bmat->nz; j++) {
1839         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1840         v++;
1841       }
1842       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1843       for (j = 0; j < mat->cmap->N; j++) {
1844         if (tmp2[j] > *norm) *norm = tmp2[j];
1845       }
1846       PetscCall(PetscFree(tmp));
1847       PetscCall(PetscFree(tmp2));
1848       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1849     } else if (type == NORM_INFINITY) { /* max row norm */
1850       PetscReal ntemp = 0.0;
1851       for (j = 0; j < aij->A->rmap->n; j++) {
1852         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1853         sum = 0.0;
1854         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1855           sum += PetscAbsScalar(*v);
1856           v++;
1857         }
1858         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1859         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1860           sum += PetscAbsScalar(*v);
1861           v++;
1862         }
1863         if (sum > ntemp) ntemp = sum;
1864       }
1865       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1866       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1867     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1868     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1869     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1870   }
1871   PetscFunctionReturn(PETSC_SUCCESS);
1872 }
1873 
1874 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1875 {
1876   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1877   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1878   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1879   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1880   Mat              B, A_diag, *B_diag;
1881   const MatScalar *pbv, *bv;
1882 
1883   PetscFunctionBegin;
1884   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1885   ma = A->rmap->n;
1886   na = A->cmap->n;
1887   mb = a->B->rmap->n;
1888   nb = a->B->cmap->n;
1889   ai = Aloc->i;
1890   aj = Aloc->j;
1891   bi = Bloc->i;
1892   bj = Bloc->j;
1893   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1894     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1895     PetscSFNode         *oloc;
1896     PETSC_UNUSED PetscSF sf;
1897 
1898     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1899     /* compute d_nnz for preallocation */
1900     PetscCall(PetscArrayzero(d_nnz, na));
1901     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1902     /* compute local off-diagonal contributions */
1903     PetscCall(PetscArrayzero(g_nnz, nb));
1904     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1905     /* map those to global */
1906     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1907     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1908     PetscCall(PetscSFSetFromOptions(sf));
1909     PetscCall(PetscArrayzero(o_nnz, na));
1910     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1911     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1912     PetscCall(PetscSFDestroy(&sf));
1913 
1914     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1915     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1916     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1917     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1918     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1919     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1920   } else {
1921     B = *matout;
1922     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1923   }
1924 
1925   b           = (Mat_MPIAIJ *)B->data;
1926   A_diag      = a->A;
1927   B_diag      = &b->A;
1928   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1929   A_diag_ncol = A_diag->cmap->N;
1930   B_diag_ilen = sub_B_diag->ilen;
1931   B_diag_i    = sub_B_diag->i;
1932 
1933   /* Set ilen for diagonal of B */
1934   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1935 
1936   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1937   very quickly (=without using MatSetValues), because all writes are local. */
1938   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1939   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1940 
1941   /* copy over the B part */
1942   PetscCall(PetscMalloc1(bi[mb], &cols));
1943   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1944   pbv = bv;
1945   row = A->rmap->rstart;
1946   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1947   cols_tmp = cols;
1948   for (i = 0; i < mb; i++) {
1949     ncol = bi[i + 1] - bi[i];
1950     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1951     row++;
1952     if (pbv) pbv += ncol;
1953     if (cols_tmp) cols_tmp += ncol;
1954   }
1955   PetscCall(PetscFree(cols));
1956   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1957 
1958   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1959   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1960   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1961     *matout = B;
1962   } else {
1963     PetscCall(MatHeaderMerge(A, &B));
1964   }
1965   PetscFunctionReturn(PETSC_SUCCESS);
1966 }
1967 
1968 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1969 {
1970   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1971   Mat         a = aij->A, b = aij->B;
1972   PetscInt    s1, s2, s3;
1973 
1974   PetscFunctionBegin;
1975   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1976   if (rr) {
1977     PetscCall(VecGetLocalSize(rr, &s1));
1978     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1979     /* Overlap communication with computation. */
1980     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1981   }
1982   if (ll) {
1983     PetscCall(VecGetLocalSize(ll, &s1));
1984     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1985     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1986   }
1987   /* scale  the diagonal block */
1988   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1989 
1990   if (rr) {
1991     /* Do a scatter end and then right scale the off-diagonal block */
1992     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1993     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1994   }
1995   PetscFunctionReturn(PETSC_SUCCESS);
1996 }
1997 
1998 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1999 {
2000   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2001 
2002   PetscFunctionBegin;
2003   PetscCall(MatSetUnfactored(a->A));
2004   PetscFunctionReturn(PETSC_SUCCESS);
2005 }
2006 
2007 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2008 {
2009   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2010   Mat         a, b, c, d;
2011   PetscBool   flg;
2012 
2013   PetscFunctionBegin;
2014   a = matA->A;
2015   b = matA->B;
2016   c = matB->A;
2017   d = matB->B;
2018 
2019   PetscCall(MatEqual(a, c, &flg));
2020   if (flg) PetscCall(MatEqual(b, d, &flg));
2021   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2022   PetscFunctionReturn(PETSC_SUCCESS);
2023 }
2024 
2025 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2026 {
2027   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2028   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2029 
2030   PetscFunctionBegin;
2031   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2032   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2033     /* because of the column compression in the off-processor part of the matrix a->B,
2034        the number of columns in a->B and b->B may be different, hence we cannot call
2035        the MatCopy() directly on the two parts. If need be, we can provide a more
2036        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2037        then copying the submatrices */
2038     PetscCall(MatCopy_Basic(A, B, str));
2039   } else {
2040     PetscCall(MatCopy(a->A, b->A, str));
2041     PetscCall(MatCopy(a->B, b->B, str));
2042   }
2043   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2044   PetscFunctionReturn(PETSC_SUCCESS);
2045 }
2046 
2047 /*
2048    Computes the number of nonzeros per row needed for preallocation when X and Y
2049    have different nonzero structure.
2050 */
2051 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2052 {
2053   PetscInt i, j, k, nzx, nzy;
2054 
2055   PetscFunctionBegin;
2056   /* Set the number of nonzeros in the new matrix */
2057   for (i = 0; i < m; i++) {
2058     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2059     nzx    = xi[i + 1] - xi[i];
2060     nzy    = yi[i + 1] - yi[i];
2061     nnz[i] = 0;
2062     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2063       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2064       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2065       nnz[i]++;
2066     }
2067     for (; k < nzy; k++) nnz[i]++;
2068   }
2069   PetscFunctionReturn(PETSC_SUCCESS);
2070 }
2071 
2072 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2073 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2074 {
2075   PetscInt    m = Y->rmap->N;
2076   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2077   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2078 
2079   PetscFunctionBegin;
2080   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2081   PetscFunctionReturn(PETSC_SUCCESS);
2082 }
2083 
2084 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2085 {
2086   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2087 
2088   PetscFunctionBegin;
2089   if (str == SAME_NONZERO_PATTERN) {
2090     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2091     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2092   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2093     PetscCall(MatAXPY_Basic(Y, a, X, str));
2094   } else {
2095     Mat       B;
2096     PetscInt *nnz_d, *nnz_o;
2097 
2098     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2099     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2100     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2101     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2102     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2103     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2104     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2105     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2106     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2107     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2108     PetscCall(MatHeaderMerge(Y, &B));
2109     PetscCall(PetscFree(nnz_d));
2110     PetscCall(PetscFree(nnz_o));
2111   }
2112   PetscFunctionReturn(PETSC_SUCCESS);
2113 }
2114 
2115 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2116 
2117 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2118 {
2119   PetscFunctionBegin;
2120   if (PetscDefined(USE_COMPLEX)) {
2121     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2122 
2123     PetscCall(MatConjugate_SeqAIJ(aij->A));
2124     PetscCall(MatConjugate_SeqAIJ(aij->B));
2125   }
2126   PetscFunctionReturn(PETSC_SUCCESS);
2127 }
2128 
2129 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2130 {
2131   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2132 
2133   PetscFunctionBegin;
2134   PetscCall(MatRealPart(a->A));
2135   PetscCall(MatRealPart(a->B));
2136   PetscFunctionReturn(PETSC_SUCCESS);
2137 }
2138 
2139 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2140 {
2141   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2142 
2143   PetscFunctionBegin;
2144   PetscCall(MatImaginaryPart(a->A));
2145   PetscCall(MatImaginaryPart(a->B));
2146   PetscFunctionReturn(PETSC_SUCCESS);
2147 }
2148 
2149 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2150 {
2151   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2152   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2153   PetscScalar       *va, *vv;
2154   Vec                vB, vA;
2155   const PetscScalar *vb;
2156 
2157   PetscFunctionBegin;
2158   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2159   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2160 
2161   PetscCall(VecGetArrayWrite(vA, &va));
2162   if (idx) {
2163     for (i = 0; i < m; i++) {
2164       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2165     }
2166   }
2167 
2168   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2169   PetscCall(PetscMalloc1(m, &idxb));
2170   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2171 
2172   PetscCall(VecGetArrayWrite(v, &vv));
2173   PetscCall(VecGetArrayRead(vB, &vb));
2174   for (i = 0; i < m; i++) {
2175     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2176       vv[i] = vb[i];
2177       if (idx) idx[i] = a->garray[idxb[i]];
2178     } else {
2179       vv[i] = va[i];
2180       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2181     }
2182   }
2183   PetscCall(VecRestoreArrayWrite(vA, &vv));
2184   PetscCall(VecRestoreArrayWrite(vA, &va));
2185   PetscCall(VecRestoreArrayRead(vB, &vb));
2186   PetscCall(PetscFree(idxb));
2187   PetscCall(VecDestroy(&vA));
2188   PetscCall(VecDestroy(&vB));
2189   PetscFunctionReturn(PETSC_SUCCESS);
2190 }
2191 
2192 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2193 {
2194   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2195   Vec         vB, vA;
2196 
2197   PetscFunctionBegin;
2198   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2199   PetscCall(MatGetRowSumAbs(a->A, vA));
2200   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2201   PetscCall(MatGetRowSumAbs(a->B, vB));
2202   PetscCall(VecAXPY(vA, 1.0, vB));
2203   PetscCall(VecDestroy(&vB));
2204   PetscCall(VecCopy(vA, v));
2205   PetscCall(VecDestroy(&vA));
2206   PetscFunctionReturn(PETSC_SUCCESS);
2207 }
2208 
2209 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2210 {
2211   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2212   PetscInt           m = A->rmap->n, n = A->cmap->n;
2213   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2214   PetscInt          *cmap = mat->garray;
2215   PetscInt          *diagIdx, *offdiagIdx;
2216   Vec                diagV, offdiagV;
2217   PetscScalar       *a, *diagA, *offdiagA;
2218   const PetscScalar *ba, *bav;
2219   PetscInt           r, j, col, ncols, *bi, *bj;
2220   Mat                B = mat->B;
2221   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2222 
2223   PetscFunctionBegin;
2224   /* When a process holds entire A and other processes have no entry */
2225   if (A->cmap->N == n) {
2226     PetscCall(VecGetArrayWrite(v, &diagA));
2227     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2228     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2229     PetscCall(VecDestroy(&diagV));
2230     PetscCall(VecRestoreArrayWrite(v, &diagA));
2231     PetscFunctionReturn(PETSC_SUCCESS);
2232   } else if (n == 0) {
2233     if (m) {
2234       PetscCall(VecGetArrayWrite(v, &a));
2235       for (r = 0; r < m; r++) {
2236         a[r] = 0.0;
2237         if (idx) idx[r] = -1;
2238       }
2239       PetscCall(VecRestoreArrayWrite(v, &a));
2240     }
2241     PetscFunctionReturn(PETSC_SUCCESS);
2242   }
2243 
2244   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2245   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2246   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2247   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2248 
2249   /* Get offdiagIdx[] for implicit 0.0 */
2250   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2251   ba = bav;
2252   bi = b->i;
2253   bj = b->j;
2254   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2255   for (r = 0; r < m; r++) {
2256     ncols = bi[r + 1] - bi[r];
2257     if (ncols == A->cmap->N - n) { /* Brow is dense */
2258       offdiagA[r]   = *ba;
2259       offdiagIdx[r] = cmap[0];
2260     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2261       offdiagA[r] = 0.0;
2262 
2263       /* Find first hole in the cmap */
2264       for (j = 0; j < ncols; j++) {
2265         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2266         if (col > j && j < cstart) {
2267           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2268           break;
2269         } else if (col > j + n && j >= cstart) {
2270           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2271           break;
2272         }
2273       }
2274       if (j == ncols && ncols < A->cmap->N - n) {
2275         /* a hole is outside compressed Bcols */
2276         if (ncols == 0) {
2277           if (cstart) {
2278             offdiagIdx[r] = 0;
2279           } else offdiagIdx[r] = cend;
2280         } else { /* ncols > 0 */
2281           offdiagIdx[r] = cmap[ncols - 1] + 1;
2282           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2283         }
2284       }
2285     }
2286 
2287     for (j = 0; j < ncols; j++) {
2288       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2289         offdiagA[r]   = *ba;
2290         offdiagIdx[r] = cmap[*bj];
2291       }
2292       ba++;
2293       bj++;
2294     }
2295   }
2296 
2297   PetscCall(VecGetArrayWrite(v, &a));
2298   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2299   for (r = 0; r < m; ++r) {
2300     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2301       a[r] = diagA[r];
2302       if (idx) idx[r] = cstart + diagIdx[r];
2303     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2304       a[r] = diagA[r];
2305       if (idx) {
2306         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2307           idx[r] = cstart + diagIdx[r];
2308         } else idx[r] = offdiagIdx[r];
2309       }
2310     } else {
2311       a[r] = offdiagA[r];
2312       if (idx) idx[r] = offdiagIdx[r];
2313     }
2314   }
2315   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2316   PetscCall(VecRestoreArrayWrite(v, &a));
2317   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2318   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2319   PetscCall(VecDestroy(&diagV));
2320   PetscCall(VecDestroy(&offdiagV));
2321   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2322   PetscFunctionReturn(PETSC_SUCCESS);
2323 }
2324 
2325 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2326 {
2327   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2328   PetscInt           m = A->rmap->n, n = A->cmap->n;
2329   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2330   PetscInt          *cmap = mat->garray;
2331   PetscInt          *diagIdx, *offdiagIdx;
2332   Vec                diagV, offdiagV;
2333   PetscScalar       *a, *diagA, *offdiagA;
2334   const PetscScalar *ba, *bav;
2335   PetscInt           r, j, col, ncols, *bi, *bj;
2336   Mat                B = mat->B;
2337   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2338 
2339   PetscFunctionBegin;
2340   /* When a process holds entire A and other processes have no entry */
2341   if (A->cmap->N == n) {
2342     PetscCall(VecGetArrayWrite(v, &diagA));
2343     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2344     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2345     PetscCall(VecDestroy(&diagV));
2346     PetscCall(VecRestoreArrayWrite(v, &diagA));
2347     PetscFunctionReturn(PETSC_SUCCESS);
2348   } else if (n == 0) {
2349     if (m) {
2350       PetscCall(VecGetArrayWrite(v, &a));
2351       for (r = 0; r < m; r++) {
2352         a[r] = PETSC_MAX_REAL;
2353         if (idx) idx[r] = -1;
2354       }
2355       PetscCall(VecRestoreArrayWrite(v, &a));
2356     }
2357     PetscFunctionReturn(PETSC_SUCCESS);
2358   }
2359 
2360   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2361   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2362   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2363   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2364 
2365   /* Get offdiagIdx[] for implicit 0.0 */
2366   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2367   ba = bav;
2368   bi = b->i;
2369   bj = b->j;
2370   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2371   for (r = 0; r < m; r++) {
2372     ncols = bi[r + 1] - bi[r];
2373     if (ncols == A->cmap->N - n) { /* Brow is dense */
2374       offdiagA[r]   = *ba;
2375       offdiagIdx[r] = cmap[0];
2376     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2377       offdiagA[r] = 0.0;
2378 
2379       /* Find first hole in the cmap */
2380       for (j = 0; j < ncols; j++) {
2381         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2382         if (col > j && j < cstart) {
2383           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2384           break;
2385         } else if (col > j + n && j >= cstart) {
2386           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2387           break;
2388         }
2389       }
2390       if (j == ncols && ncols < A->cmap->N - n) {
2391         /* a hole is outside compressed Bcols */
2392         if (ncols == 0) {
2393           if (cstart) {
2394             offdiagIdx[r] = 0;
2395           } else offdiagIdx[r] = cend;
2396         } else { /* ncols > 0 */
2397           offdiagIdx[r] = cmap[ncols - 1] + 1;
2398           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2399         }
2400       }
2401     }
2402 
2403     for (j = 0; j < ncols; j++) {
2404       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2405         offdiagA[r]   = *ba;
2406         offdiagIdx[r] = cmap[*bj];
2407       }
2408       ba++;
2409       bj++;
2410     }
2411   }
2412 
2413   PetscCall(VecGetArrayWrite(v, &a));
2414   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2415   for (r = 0; r < m; ++r) {
2416     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2417       a[r] = diagA[r];
2418       if (idx) idx[r] = cstart + diagIdx[r];
2419     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2420       a[r] = diagA[r];
2421       if (idx) {
2422         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2423           idx[r] = cstart + diagIdx[r];
2424         } else idx[r] = offdiagIdx[r];
2425       }
2426     } else {
2427       a[r] = offdiagA[r];
2428       if (idx) idx[r] = offdiagIdx[r];
2429     }
2430   }
2431   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2432   PetscCall(VecRestoreArrayWrite(v, &a));
2433   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2434   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2435   PetscCall(VecDestroy(&diagV));
2436   PetscCall(VecDestroy(&offdiagV));
2437   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2438   PetscFunctionReturn(PETSC_SUCCESS);
2439 }
2440 
2441 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2442 {
2443   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2444   PetscInt           m = A->rmap->n, n = A->cmap->n;
2445   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2446   PetscInt          *cmap = mat->garray;
2447   PetscInt          *diagIdx, *offdiagIdx;
2448   Vec                diagV, offdiagV;
2449   PetscScalar       *a, *diagA, *offdiagA;
2450   const PetscScalar *ba, *bav;
2451   PetscInt           r, j, col, ncols, *bi, *bj;
2452   Mat                B = mat->B;
2453   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2454 
2455   PetscFunctionBegin;
2456   /* When a process holds entire A and other processes have no entry */
2457   if (A->cmap->N == n) {
2458     PetscCall(VecGetArrayWrite(v, &diagA));
2459     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2460     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2461     PetscCall(VecDestroy(&diagV));
2462     PetscCall(VecRestoreArrayWrite(v, &diagA));
2463     PetscFunctionReturn(PETSC_SUCCESS);
2464   } else if (n == 0) {
2465     if (m) {
2466       PetscCall(VecGetArrayWrite(v, &a));
2467       for (r = 0; r < m; r++) {
2468         a[r] = PETSC_MIN_REAL;
2469         if (idx) idx[r] = -1;
2470       }
2471       PetscCall(VecRestoreArrayWrite(v, &a));
2472     }
2473     PetscFunctionReturn(PETSC_SUCCESS);
2474   }
2475 
2476   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2477   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2478   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2479   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2480 
2481   /* Get offdiagIdx[] for implicit 0.0 */
2482   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2483   ba = bav;
2484   bi = b->i;
2485   bj = b->j;
2486   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2487   for (r = 0; r < m; r++) {
2488     ncols = bi[r + 1] - bi[r];
2489     if (ncols == A->cmap->N - n) { /* Brow is dense */
2490       offdiagA[r]   = *ba;
2491       offdiagIdx[r] = cmap[0];
2492     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2493       offdiagA[r] = 0.0;
2494 
2495       /* Find first hole in the cmap */
2496       for (j = 0; j < ncols; j++) {
2497         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2498         if (col > j && j < cstart) {
2499           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2500           break;
2501         } else if (col > j + n && j >= cstart) {
2502           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2503           break;
2504         }
2505       }
2506       if (j == ncols && ncols < A->cmap->N - n) {
2507         /* a hole is outside compressed Bcols */
2508         if (ncols == 0) {
2509           if (cstart) {
2510             offdiagIdx[r] = 0;
2511           } else offdiagIdx[r] = cend;
2512         } else { /* ncols > 0 */
2513           offdiagIdx[r] = cmap[ncols - 1] + 1;
2514           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2515         }
2516       }
2517     }
2518 
2519     for (j = 0; j < ncols; j++) {
2520       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2521         offdiagA[r]   = *ba;
2522         offdiagIdx[r] = cmap[*bj];
2523       }
2524       ba++;
2525       bj++;
2526     }
2527   }
2528 
2529   PetscCall(VecGetArrayWrite(v, &a));
2530   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2531   for (r = 0; r < m; ++r) {
2532     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2533       a[r] = diagA[r];
2534       if (idx) idx[r] = cstart + diagIdx[r];
2535     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2536       a[r] = diagA[r];
2537       if (idx) {
2538         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2539           idx[r] = cstart + diagIdx[r];
2540         } else idx[r] = offdiagIdx[r];
2541       }
2542     } else {
2543       a[r] = offdiagA[r];
2544       if (idx) idx[r] = offdiagIdx[r];
2545     }
2546   }
2547   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2548   PetscCall(VecRestoreArrayWrite(v, &a));
2549   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2550   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2551   PetscCall(VecDestroy(&diagV));
2552   PetscCall(VecDestroy(&offdiagV));
2553   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2554   PetscFunctionReturn(PETSC_SUCCESS);
2555 }
2556 
2557 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2558 {
2559   Mat *dummy;
2560 
2561   PetscFunctionBegin;
2562   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2563   *newmat = *dummy;
2564   PetscCall(PetscFree(dummy));
2565   PetscFunctionReturn(PETSC_SUCCESS);
2566 }
2567 
2568 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2569 {
2570   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2571 
2572   PetscFunctionBegin;
2573   PetscCall(MatInvertBlockDiagonal(a->A, values));
2574   A->factorerrortype = a->A->factorerrortype;
2575   PetscFunctionReturn(PETSC_SUCCESS);
2576 }
2577 
2578 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2579 {
2580   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2581 
2582   PetscFunctionBegin;
2583   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2584   PetscCall(MatSetRandom(aij->A, rctx));
2585   if (x->assembled) {
2586     PetscCall(MatSetRandom(aij->B, rctx));
2587   } else {
2588     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2589   }
2590   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2591   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2592   PetscFunctionReturn(PETSC_SUCCESS);
2593 }
2594 
2595 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2596 {
2597   PetscFunctionBegin;
2598   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2599   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2600   PetscFunctionReturn(PETSC_SUCCESS);
2601 }
2602 
2603 /*@
2604   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2605 
2606   Not Collective
2607 
2608   Input Parameter:
2609 . A - the matrix
2610 
2611   Output Parameter:
2612 . nz - the number of nonzeros
2613 
2614   Level: advanced
2615 
2616 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2617 @*/
2618 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2619 {
2620   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2621   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2622   PetscBool   isaij;
2623 
2624   PetscFunctionBegin;
2625   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2626   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2627   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2628   PetscFunctionReturn(PETSC_SUCCESS);
2629 }
2630 
2631 /*@
2632   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2633 
2634   Collective
2635 
2636   Input Parameters:
2637 + A  - the matrix
2638 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2639 
2640   Level: advanced
2641 
2642 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2643 @*/
2644 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2645 {
2646   PetscFunctionBegin;
2647   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2648   PetscFunctionReturn(PETSC_SUCCESS);
2649 }
2650 
2651 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2652 {
2653   PetscBool sc = PETSC_FALSE, flg;
2654 
2655   PetscFunctionBegin;
2656   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2657   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2658   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2659   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2660   PetscOptionsHeadEnd();
2661   PetscFunctionReturn(PETSC_SUCCESS);
2662 }
2663 
2664 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2665 {
2666   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2667   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2668 
2669   PetscFunctionBegin;
2670   if (!Y->preallocated) {
2671     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2672   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2673     PetscInt nonew = aij->nonew;
2674     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2675     aij->nonew = nonew;
2676   }
2677   PetscCall(MatShift_Basic(Y, a));
2678   PetscFunctionReturn(PETSC_SUCCESS);
2679 }
2680 
2681 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2682 {
2683   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2684 
2685   PetscFunctionBegin;
2686   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2687   PetscCall(MatMissingDiagonal(a->A, missing, d));
2688   if (d) {
2689     PetscInt rstart;
2690     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2691     *d += rstart;
2692   }
2693   PetscFunctionReturn(PETSC_SUCCESS);
2694 }
2695 
2696 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2697 {
2698   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2699 
2700   PetscFunctionBegin;
2701   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2702   PetscFunctionReturn(PETSC_SUCCESS);
2703 }
2704 
2705 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2706 {
2707   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2708 
2709   PetscFunctionBegin;
2710   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2711   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2712   PetscFunctionReturn(PETSC_SUCCESS);
2713 }
2714 
2715 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2716                                        MatGetRow_MPIAIJ,
2717                                        MatRestoreRow_MPIAIJ,
2718                                        MatMult_MPIAIJ,
2719                                        /* 4*/ MatMultAdd_MPIAIJ,
2720                                        MatMultTranspose_MPIAIJ,
2721                                        MatMultTransposeAdd_MPIAIJ,
2722                                        NULL,
2723                                        NULL,
2724                                        NULL,
2725                                        /*10*/ NULL,
2726                                        NULL,
2727                                        NULL,
2728                                        MatSOR_MPIAIJ,
2729                                        MatTranspose_MPIAIJ,
2730                                        /*15*/ MatGetInfo_MPIAIJ,
2731                                        MatEqual_MPIAIJ,
2732                                        MatGetDiagonal_MPIAIJ,
2733                                        MatDiagonalScale_MPIAIJ,
2734                                        MatNorm_MPIAIJ,
2735                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2736                                        MatAssemblyEnd_MPIAIJ,
2737                                        MatSetOption_MPIAIJ,
2738                                        MatZeroEntries_MPIAIJ,
2739                                        /*24*/ MatZeroRows_MPIAIJ,
2740                                        NULL,
2741                                        NULL,
2742                                        NULL,
2743                                        NULL,
2744                                        /*29*/ MatSetUp_MPI_Hash,
2745                                        NULL,
2746                                        NULL,
2747                                        MatGetDiagonalBlock_MPIAIJ,
2748                                        NULL,
2749                                        /*34*/ MatDuplicate_MPIAIJ,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                        NULL,
2754                                        /*39*/ MatAXPY_MPIAIJ,
2755                                        MatCreateSubMatrices_MPIAIJ,
2756                                        MatIncreaseOverlap_MPIAIJ,
2757                                        MatGetValues_MPIAIJ,
2758                                        MatCopy_MPIAIJ,
2759                                        /*44*/ MatGetRowMax_MPIAIJ,
2760                                        MatScale_MPIAIJ,
2761                                        MatShift_MPIAIJ,
2762                                        MatDiagonalSet_MPIAIJ,
2763                                        MatZeroRowsColumns_MPIAIJ,
2764                                        /*49*/ MatSetRandom_MPIAIJ,
2765                                        MatGetRowIJ_MPIAIJ,
2766                                        MatRestoreRowIJ_MPIAIJ,
2767                                        NULL,
2768                                        NULL,
2769                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2770                                        NULL,
2771                                        MatSetUnfactored_MPIAIJ,
2772                                        MatPermute_MPIAIJ,
2773                                        NULL,
2774                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2775                                        MatDestroy_MPIAIJ,
2776                                        MatView_MPIAIJ,
2777                                        NULL,
2778                                        NULL,
2779                                        /*64*/ NULL,
2780                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2781                                        NULL,
2782                                        NULL,
2783                                        NULL,
2784                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2785                                        MatGetRowMinAbs_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        NULL,
2789                                        NULL,
2790                                        /*75*/ MatFDColoringApply_AIJ,
2791                                        MatSetFromOptions_MPIAIJ,
2792                                        NULL,
2793                                        NULL,
2794                                        MatFindZeroDiagonals_MPIAIJ,
2795                                        /*80*/ NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        /*83*/ MatLoad_MPIAIJ,
2799                                        NULL,
2800                                        NULL,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        /*89*/ NULL,
2805                                        NULL,
2806                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2807                                        NULL,
2808                                        NULL,
2809                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2810                                        NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        MatBindToCPU_MPIAIJ,
2814                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2815                                        NULL,
2816                                        NULL,
2817                                        MatConjugate_MPIAIJ,
2818                                        NULL,
2819                                        /*104*/ MatSetValuesRow_MPIAIJ,
2820                                        MatRealPart_MPIAIJ,
2821                                        MatImaginaryPart_MPIAIJ,
2822                                        NULL,
2823                                        NULL,
2824                                        /*109*/ NULL,
2825                                        NULL,
2826                                        MatGetRowMin_MPIAIJ,
2827                                        NULL,
2828                                        MatMissingDiagonal_MPIAIJ,
2829                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2830                                        NULL,
2831                                        MatGetGhosts_MPIAIJ,
2832                                        NULL,
2833                                        NULL,
2834                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2835                                        NULL,
2836                                        NULL,
2837                                        NULL,
2838                                        MatGetMultiProcBlock_MPIAIJ,
2839                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2840                                        MatGetColumnReductions_MPIAIJ,
2841                                        MatInvertBlockDiagonal_MPIAIJ,
2842                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2843                                        MatCreateSubMatricesMPI_MPIAIJ,
2844                                        /*129*/ NULL,
2845                                        NULL,
2846                                        NULL,
2847                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2848                                        NULL,
2849                                        /*134*/ NULL,
2850                                        NULL,
2851                                        NULL,
2852                                        NULL,
2853                                        NULL,
2854                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2855                                        NULL,
2856                                        NULL,
2857                                        MatFDColoringSetUp_MPIXAIJ,
2858                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2859                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2860                                        /*145*/ NULL,
2861                                        NULL,
2862                                        NULL,
2863                                        MatCreateGraph_Simple_AIJ,
2864                                        NULL,
2865                                        /*150*/ NULL,
2866                                        MatEliminateZeros_MPIAIJ,
2867                                        MatGetRowSumAbs_MPIAIJ};
2868 
2869 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2870 {
2871   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2872 
2873   PetscFunctionBegin;
2874   PetscCall(MatStoreValues(aij->A));
2875   PetscCall(MatStoreValues(aij->B));
2876   PetscFunctionReturn(PETSC_SUCCESS);
2877 }
2878 
2879 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2880 {
2881   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2882 
2883   PetscFunctionBegin;
2884   PetscCall(MatRetrieveValues(aij->A));
2885   PetscCall(MatRetrieveValues(aij->B));
2886   PetscFunctionReturn(PETSC_SUCCESS);
2887 }
2888 
2889 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2890 {
2891   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2892   PetscMPIInt size;
2893 
2894   PetscFunctionBegin;
2895   if (B->hash_active) {
2896     B->ops[0]      = b->cops;
2897     B->hash_active = PETSC_FALSE;
2898   }
2899   PetscCall(PetscLayoutSetUp(B->rmap));
2900   PetscCall(PetscLayoutSetUp(B->cmap));
2901 
2902 #if defined(PETSC_USE_CTABLE)
2903   PetscCall(PetscHMapIDestroy(&b->colmap));
2904 #else
2905   PetscCall(PetscFree(b->colmap));
2906 #endif
2907   PetscCall(PetscFree(b->garray));
2908   PetscCall(VecDestroy(&b->lvec));
2909   PetscCall(VecScatterDestroy(&b->Mvctx));
2910 
2911   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2912 
2913   MatSeqXAIJGetOptions_Private(b->B);
2914   PetscCall(MatDestroy(&b->B));
2915   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2916   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2917   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2918   PetscCall(MatSetType(b->B, MATSEQAIJ));
2919   MatSeqXAIJRestoreOptions_Private(b->B);
2920 
2921   MatSeqXAIJGetOptions_Private(b->A);
2922   PetscCall(MatDestroy(&b->A));
2923   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2924   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2925   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2926   PetscCall(MatSetType(b->A, MATSEQAIJ));
2927   MatSeqXAIJRestoreOptions_Private(b->A);
2928 
2929   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2930   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2931   B->preallocated  = PETSC_TRUE;
2932   B->was_assembled = PETSC_FALSE;
2933   B->assembled     = PETSC_FALSE;
2934   PetscFunctionReturn(PETSC_SUCCESS);
2935 }
2936 
2937 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2938 {
2939   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2940 
2941   PetscFunctionBegin;
2942   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2943   PetscCall(PetscLayoutSetUp(B->rmap));
2944   PetscCall(PetscLayoutSetUp(B->cmap));
2945 
2946 #if defined(PETSC_USE_CTABLE)
2947   PetscCall(PetscHMapIDestroy(&b->colmap));
2948 #else
2949   PetscCall(PetscFree(b->colmap));
2950 #endif
2951   PetscCall(PetscFree(b->garray));
2952   PetscCall(VecDestroy(&b->lvec));
2953   PetscCall(VecScatterDestroy(&b->Mvctx));
2954 
2955   PetscCall(MatResetPreallocation(b->A));
2956   PetscCall(MatResetPreallocation(b->B));
2957   B->preallocated  = PETSC_TRUE;
2958   B->was_assembled = PETSC_FALSE;
2959   B->assembled     = PETSC_FALSE;
2960   PetscFunctionReturn(PETSC_SUCCESS);
2961 }
2962 
2963 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2964 {
2965   Mat         mat;
2966   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2967 
2968   PetscFunctionBegin;
2969   *newmat = NULL;
2970   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2971   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2972   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2973   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2974   a = (Mat_MPIAIJ *)mat->data;
2975 
2976   mat->factortype = matin->factortype;
2977   mat->assembled  = matin->assembled;
2978   mat->insertmode = NOT_SET_VALUES;
2979 
2980   a->size         = oldmat->size;
2981   a->rank         = oldmat->rank;
2982   a->donotstash   = oldmat->donotstash;
2983   a->roworiented  = oldmat->roworiented;
2984   a->rowindices   = NULL;
2985   a->rowvalues    = NULL;
2986   a->getrowactive = PETSC_FALSE;
2987 
2988   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2989   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2990   if (matin->hash_active) {
2991     PetscCall(MatSetUp(mat));
2992   } else {
2993     mat->preallocated = matin->preallocated;
2994     if (oldmat->colmap) {
2995 #if defined(PETSC_USE_CTABLE)
2996       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2997 #else
2998       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2999       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3000 #endif
3001     } else a->colmap = NULL;
3002     if (oldmat->garray) {
3003       PetscInt len;
3004       len = oldmat->B->cmap->n;
3005       PetscCall(PetscMalloc1(len + 1, &a->garray));
3006       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3007     } else a->garray = NULL;
3008 
3009     /* It may happen MatDuplicate is called with a non-assembled matrix
3010       In fact, MatDuplicate only requires the matrix to be preallocated
3011       This may happen inside a DMCreateMatrix_Shell */
3012     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3013     if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3014     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3015     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3016   }
3017   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3018   *newmat = mat;
3019   PetscFunctionReturn(PETSC_SUCCESS);
3020 }
3021 
3022 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3023 {
3024   PetscBool isbinary, ishdf5;
3025 
3026   PetscFunctionBegin;
3027   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3028   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3029   /* force binary viewer to load .info file if it has not yet done so */
3030   PetscCall(PetscViewerSetUp(viewer));
3031   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3032   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3033   if (isbinary) {
3034     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3035   } else if (ishdf5) {
3036 #if defined(PETSC_HAVE_HDF5)
3037     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3038 #else
3039     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3040 #endif
3041   } else {
3042     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3043   }
3044   PetscFunctionReturn(PETSC_SUCCESS);
3045 }
3046 
3047 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3048 {
3049   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3050   PetscInt    *rowidxs, *colidxs;
3051   PetscScalar *matvals;
3052 
3053   PetscFunctionBegin;
3054   PetscCall(PetscViewerSetUp(viewer));
3055 
3056   /* read in matrix header */
3057   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3058   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3059   M  = header[1];
3060   N  = header[2];
3061   nz = header[3];
3062   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3063   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3064   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3065 
3066   /* set block sizes from the viewer's .info file */
3067   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3068   /* set global sizes if not set already */
3069   if (mat->rmap->N < 0) mat->rmap->N = M;
3070   if (mat->cmap->N < 0) mat->cmap->N = N;
3071   PetscCall(PetscLayoutSetUp(mat->rmap));
3072   PetscCall(PetscLayoutSetUp(mat->cmap));
3073 
3074   /* check if the matrix sizes are correct */
3075   PetscCall(MatGetSize(mat, &rows, &cols));
3076   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3077 
3078   /* read in row lengths and build row indices */
3079   PetscCall(MatGetLocalSize(mat, &m, NULL));
3080   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3081   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3082   rowidxs[0] = 0;
3083   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3084   if (nz != PETSC_MAX_INT) {
3085     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3086     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3087   }
3088 
3089   /* read in column indices and matrix values */
3090   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3091   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3092   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3093   /* store matrix indices and values */
3094   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3095   PetscCall(PetscFree(rowidxs));
3096   PetscCall(PetscFree2(colidxs, matvals));
3097   PetscFunctionReturn(PETSC_SUCCESS);
3098 }
3099 
3100 /* Not scalable because of ISAllGather() unless getting all columns. */
3101 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3102 {
3103   IS          iscol_local;
3104   PetscBool   isstride;
3105   PetscMPIInt lisstride = 0, gisstride;
3106 
3107   PetscFunctionBegin;
3108   /* check if we are grabbing all columns*/
3109   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3110 
3111   if (isstride) {
3112     PetscInt start, len, mstart, mlen;
3113     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3114     PetscCall(ISGetLocalSize(iscol, &len));
3115     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3116     if (mstart == start && mlen - mstart == len) lisstride = 1;
3117   }
3118 
3119   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3120   if (gisstride) {
3121     PetscInt N;
3122     PetscCall(MatGetSize(mat, NULL, &N));
3123     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3124     PetscCall(ISSetIdentity(iscol_local));
3125     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3126   } else {
3127     PetscInt cbs;
3128     PetscCall(ISGetBlockSize(iscol, &cbs));
3129     PetscCall(ISAllGather(iscol, &iscol_local));
3130     PetscCall(ISSetBlockSize(iscol_local, cbs));
3131   }
3132 
3133   *isseq = iscol_local;
3134   PetscFunctionReturn(PETSC_SUCCESS);
3135 }
3136 
3137 /*
3138  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3139  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3140 
3141  Input Parameters:
3142 +   mat - matrix
3143 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3144            i.e., mat->rstart <= isrow[i] < mat->rend
3145 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3146            i.e., mat->cstart <= iscol[i] < mat->cend
3147 
3148  Output Parameters:
3149 +   isrow_d - sequential row index set for retrieving mat->A
3150 .   iscol_d - sequential  column index set for retrieving mat->A
3151 .   iscol_o - sequential column index set for retrieving mat->B
3152 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3153  */
3154 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3155 {
3156   Vec             x, cmap;
3157   const PetscInt *is_idx;
3158   PetscScalar    *xarray, *cmaparray;
3159   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3160   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3161   Mat             B    = a->B;
3162   Vec             lvec = a->lvec, lcmap;
3163   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3164   MPI_Comm        comm;
3165   VecScatter      Mvctx = a->Mvctx;
3166 
3167   PetscFunctionBegin;
3168   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3169   PetscCall(ISGetLocalSize(iscol, &ncols));
3170 
3171   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3172   PetscCall(MatCreateVecs(mat, &x, NULL));
3173   PetscCall(VecSet(x, -1.0));
3174   PetscCall(VecDuplicate(x, &cmap));
3175   PetscCall(VecSet(cmap, -1.0));
3176 
3177   /* Get start indices */
3178   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3179   isstart -= ncols;
3180   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3181 
3182   PetscCall(ISGetIndices(iscol, &is_idx));
3183   PetscCall(VecGetArray(x, &xarray));
3184   PetscCall(VecGetArray(cmap, &cmaparray));
3185   PetscCall(PetscMalloc1(ncols, &idx));
3186   for (i = 0; i < ncols; i++) {
3187     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3188     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3189     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3190   }
3191   PetscCall(VecRestoreArray(x, &xarray));
3192   PetscCall(VecRestoreArray(cmap, &cmaparray));
3193   PetscCall(ISRestoreIndices(iscol, &is_idx));
3194 
3195   /* Get iscol_d */
3196   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3197   PetscCall(ISGetBlockSize(iscol, &i));
3198   PetscCall(ISSetBlockSize(*iscol_d, i));
3199 
3200   /* Get isrow_d */
3201   PetscCall(ISGetLocalSize(isrow, &m));
3202   rstart = mat->rmap->rstart;
3203   PetscCall(PetscMalloc1(m, &idx));
3204   PetscCall(ISGetIndices(isrow, &is_idx));
3205   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3206   PetscCall(ISRestoreIndices(isrow, &is_idx));
3207 
3208   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3209   PetscCall(ISGetBlockSize(isrow, &i));
3210   PetscCall(ISSetBlockSize(*isrow_d, i));
3211 
3212   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3213   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3214   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3215 
3216   PetscCall(VecDuplicate(lvec, &lcmap));
3217 
3218   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3219   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3220 
3221   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3222   /* off-process column indices */
3223   count = 0;
3224   PetscCall(PetscMalloc1(Bn, &idx));
3225   PetscCall(PetscMalloc1(Bn, &cmap1));
3226 
3227   PetscCall(VecGetArray(lvec, &xarray));
3228   PetscCall(VecGetArray(lcmap, &cmaparray));
3229   for (i = 0; i < Bn; i++) {
3230     if (PetscRealPart(xarray[i]) > -1.0) {
3231       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3232       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3233       count++;
3234     }
3235   }
3236   PetscCall(VecRestoreArray(lvec, &xarray));
3237   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3238 
3239   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3240   /* cannot ensure iscol_o has same blocksize as iscol! */
3241 
3242   PetscCall(PetscFree(idx));
3243   *garray = cmap1;
3244 
3245   PetscCall(VecDestroy(&x));
3246   PetscCall(VecDestroy(&cmap));
3247   PetscCall(VecDestroy(&lcmap));
3248   PetscFunctionReturn(PETSC_SUCCESS);
3249 }
3250 
3251 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3252 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3253 {
3254   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3255   Mat         M = NULL;
3256   MPI_Comm    comm;
3257   IS          iscol_d, isrow_d, iscol_o;
3258   Mat         Asub = NULL, Bsub = NULL;
3259   PetscInt    n;
3260 
3261   PetscFunctionBegin;
3262   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3263 
3264   if (call == MAT_REUSE_MATRIX) {
3265     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3266     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3267     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3268 
3269     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3270     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3271 
3272     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3273     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3274 
3275     /* Update diagonal and off-diagonal portions of submat */
3276     asub = (Mat_MPIAIJ *)(*submat)->data;
3277     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3278     PetscCall(ISGetLocalSize(iscol_o, &n));
3279     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3280     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3281     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3282 
3283   } else { /* call == MAT_INITIAL_MATRIX) */
3284     const PetscInt *garray;
3285     PetscInt        BsubN;
3286 
3287     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3288     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3289 
3290     /* Create local submatrices Asub and Bsub */
3291     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3292     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3293 
3294     /* Create submatrix M */
3295     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3296 
3297     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3298     asub = (Mat_MPIAIJ *)M->data;
3299 
3300     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3301     n = asub->B->cmap->N;
3302     if (BsubN > n) {
3303       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3304       const PetscInt *idx;
3305       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3306       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3307 
3308       PetscCall(PetscMalloc1(n, &idx_new));
3309       j = 0;
3310       PetscCall(ISGetIndices(iscol_o, &idx));
3311       for (i = 0; i < n; i++) {
3312         if (j >= BsubN) break;
3313         while (subgarray[i] > garray[j]) j++;
3314 
3315         if (subgarray[i] == garray[j]) {
3316           idx_new[i] = idx[j++];
3317         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3318       }
3319       PetscCall(ISRestoreIndices(iscol_o, &idx));
3320 
3321       PetscCall(ISDestroy(&iscol_o));
3322       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3323 
3324     } else if (BsubN < n) {
3325       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3326     }
3327 
3328     PetscCall(PetscFree(garray));
3329     *submat = M;
3330 
3331     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3332     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3333     PetscCall(ISDestroy(&isrow_d));
3334 
3335     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3336     PetscCall(ISDestroy(&iscol_d));
3337 
3338     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3339     PetscCall(ISDestroy(&iscol_o));
3340   }
3341   PetscFunctionReturn(PETSC_SUCCESS);
3342 }
3343 
3344 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3345 {
3346   IS        iscol_local = NULL, isrow_d;
3347   PetscInt  csize;
3348   PetscInt  n, i, j, start, end;
3349   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3350   MPI_Comm  comm;
3351 
3352   PetscFunctionBegin;
3353   /* If isrow has same processor distribution as mat,
3354      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3355   if (call == MAT_REUSE_MATRIX) {
3356     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3357     if (isrow_d) {
3358       sameRowDist  = PETSC_TRUE;
3359       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3360     } else {
3361       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3362       if (iscol_local) {
3363         sameRowDist  = PETSC_TRUE;
3364         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3365       }
3366     }
3367   } else {
3368     /* Check if isrow has same processor distribution as mat */
3369     sameDist[0] = PETSC_FALSE;
3370     PetscCall(ISGetLocalSize(isrow, &n));
3371     if (!n) {
3372       sameDist[0] = PETSC_TRUE;
3373     } else {
3374       PetscCall(ISGetMinMax(isrow, &i, &j));
3375       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3376       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3377     }
3378 
3379     /* Check if iscol has same processor distribution as mat */
3380     sameDist[1] = PETSC_FALSE;
3381     PetscCall(ISGetLocalSize(iscol, &n));
3382     if (!n) {
3383       sameDist[1] = PETSC_TRUE;
3384     } else {
3385       PetscCall(ISGetMinMax(iscol, &i, &j));
3386       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3387       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3388     }
3389 
3390     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3391     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3392     sameRowDist = tsameDist[0];
3393   }
3394 
3395   if (sameRowDist) {
3396     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3397       /* isrow and iscol have same processor distribution as mat */
3398       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3399       PetscFunctionReturn(PETSC_SUCCESS);
3400     } else { /* sameRowDist */
3401       /* isrow has same processor distribution as mat */
3402       if (call == MAT_INITIAL_MATRIX) {
3403         PetscBool sorted;
3404         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3405         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3406         PetscCall(ISGetSize(iscol, &i));
3407         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3408 
3409         PetscCall(ISSorted(iscol_local, &sorted));
3410         if (sorted) {
3411           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3412           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3413           PetscFunctionReturn(PETSC_SUCCESS);
3414         }
3415       } else { /* call == MAT_REUSE_MATRIX */
3416         IS iscol_sub;
3417         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3418         if (iscol_sub) {
3419           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3420           PetscFunctionReturn(PETSC_SUCCESS);
3421         }
3422       }
3423     }
3424   }
3425 
3426   /* General case: iscol -> iscol_local which has global size of iscol */
3427   if (call == MAT_REUSE_MATRIX) {
3428     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3429     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3430   } else {
3431     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3432   }
3433 
3434   PetscCall(ISGetLocalSize(iscol, &csize));
3435   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3436 
3437   if (call == MAT_INITIAL_MATRIX) {
3438     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3439     PetscCall(ISDestroy(&iscol_local));
3440   }
3441   PetscFunctionReturn(PETSC_SUCCESS);
3442 }
3443 
3444 /*@C
3445   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3446   and "off-diagonal" part of the matrix in CSR format.
3447 
3448   Collective
3449 
3450   Input Parameters:
3451 + comm   - MPI communicator
3452 . A      - "diagonal" portion of matrix
3453 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3454 - garray - global index of `B` columns
3455 
3456   Output Parameter:
3457 . mat - the matrix, with input `A` as its local diagonal matrix
3458 
3459   Level: advanced
3460 
3461   Notes:
3462   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3463 
3464   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3465 
3466 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3467 @*/
3468 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3469 {
3470   Mat_MPIAIJ        *maij;
3471   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3472   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3473   const PetscScalar *oa;
3474   Mat                Bnew;
3475   PetscInt           m, n, N;
3476   MatType            mpi_mat_type;
3477 
3478   PetscFunctionBegin;
3479   PetscCall(MatCreate(comm, mat));
3480   PetscCall(MatGetSize(A, &m, &n));
3481   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3482   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3483   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3484   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3485 
3486   /* Get global columns of mat */
3487   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3488 
3489   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3490   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3491   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3492   PetscCall(MatSetType(*mat, mpi_mat_type));
3493 
3494   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3495   maij = (Mat_MPIAIJ *)(*mat)->data;
3496 
3497   (*mat)->preallocated = PETSC_TRUE;
3498 
3499   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3500   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3501 
3502   /* Set A as diagonal portion of *mat */
3503   maij->A = A;
3504 
3505   nz = oi[m];
3506   for (i = 0; i < nz; i++) {
3507     col   = oj[i];
3508     oj[i] = garray[col];
3509   }
3510 
3511   /* Set Bnew as off-diagonal portion of *mat */
3512   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3513   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3514   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3515   bnew        = (Mat_SeqAIJ *)Bnew->data;
3516   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3517   maij->B     = Bnew;
3518 
3519   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3520 
3521   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3522   b->free_a       = PETSC_FALSE;
3523   b->free_ij      = PETSC_FALSE;
3524   PetscCall(MatDestroy(&B));
3525 
3526   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3527   bnew->free_a       = PETSC_TRUE;
3528   bnew->free_ij      = PETSC_TRUE;
3529 
3530   /* condense columns of maij->B */
3531   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3532   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3533   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3534   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3535   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3536   PetscFunctionReturn(PETSC_SUCCESS);
3537 }
3538 
3539 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3540 
3541 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3542 {
3543   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3544   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3545   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3546   Mat             M, Msub, B = a->B;
3547   MatScalar      *aa;
3548   Mat_SeqAIJ     *aij;
3549   PetscInt       *garray = a->garray, *colsub, Ncols;
3550   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3551   IS              iscol_sub, iscmap;
3552   const PetscInt *is_idx, *cmap;
3553   PetscBool       allcolumns = PETSC_FALSE;
3554   MPI_Comm        comm;
3555 
3556   PetscFunctionBegin;
3557   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3558   if (call == MAT_REUSE_MATRIX) {
3559     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3560     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3561     PetscCall(ISGetLocalSize(iscol_sub, &count));
3562 
3563     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3564     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3565 
3566     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3567     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3568 
3569     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3570 
3571   } else { /* call == MAT_INITIAL_MATRIX) */
3572     PetscBool flg;
3573 
3574     PetscCall(ISGetLocalSize(iscol, &n));
3575     PetscCall(ISGetSize(iscol, &Ncols));
3576 
3577     /* (1) iscol -> nonscalable iscol_local */
3578     /* Check for special case: each processor gets entire matrix columns */
3579     PetscCall(ISIdentity(iscol_local, &flg));
3580     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3581     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3582     if (allcolumns) {
3583       iscol_sub = iscol_local;
3584       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3585       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3586 
3587     } else {
3588       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3589       PetscInt *idx, *cmap1, k;
3590       PetscCall(PetscMalloc1(Ncols, &idx));
3591       PetscCall(PetscMalloc1(Ncols, &cmap1));
3592       PetscCall(ISGetIndices(iscol_local, &is_idx));
3593       count = 0;
3594       k     = 0;
3595       for (i = 0; i < Ncols; i++) {
3596         j = is_idx[i];
3597         if (j >= cstart && j < cend) {
3598           /* diagonal part of mat */
3599           idx[count]     = j;
3600           cmap1[count++] = i; /* column index in submat */
3601         } else if (Bn) {
3602           /* off-diagonal part of mat */
3603           if (j == garray[k]) {
3604             idx[count]     = j;
3605             cmap1[count++] = i; /* column index in submat */
3606           } else if (j > garray[k]) {
3607             while (j > garray[k] && k < Bn - 1) k++;
3608             if (j == garray[k]) {
3609               idx[count]     = j;
3610               cmap1[count++] = i; /* column index in submat */
3611             }
3612           }
3613         }
3614       }
3615       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3616 
3617       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3618       PetscCall(ISGetBlockSize(iscol, &cbs));
3619       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3620 
3621       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3622     }
3623 
3624     /* (3) Create sequential Msub */
3625     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3626   }
3627 
3628   PetscCall(ISGetLocalSize(iscol_sub, &count));
3629   aij = (Mat_SeqAIJ *)(Msub)->data;
3630   ii  = aij->i;
3631   PetscCall(ISGetIndices(iscmap, &cmap));
3632 
3633   /*
3634       m - number of local rows
3635       Ncols - number of columns (same on all processors)
3636       rstart - first row in new global matrix generated
3637   */
3638   PetscCall(MatGetSize(Msub, &m, NULL));
3639 
3640   if (call == MAT_INITIAL_MATRIX) {
3641     /* (4) Create parallel newmat */
3642     PetscMPIInt rank, size;
3643     PetscInt    csize;
3644 
3645     PetscCallMPI(MPI_Comm_size(comm, &size));
3646     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3647 
3648     /*
3649         Determine the number of non-zeros in the diagonal and off-diagonal
3650         portions of the matrix in order to do correct preallocation
3651     */
3652 
3653     /* first get start and end of "diagonal" columns */
3654     PetscCall(ISGetLocalSize(iscol, &csize));
3655     if (csize == PETSC_DECIDE) {
3656       PetscCall(ISGetSize(isrow, &mglobal));
3657       if (mglobal == Ncols) { /* square matrix */
3658         nlocal = m;
3659       } else {
3660         nlocal = Ncols / size + ((Ncols % size) > rank);
3661       }
3662     } else {
3663       nlocal = csize;
3664     }
3665     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3666     rstart = rend - nlocal;
3667     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3668 
3669     /* next, compute all the lengths */
3670     jj = aij->j;
3671     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3672     olens = dlens + m;
3673     for (i = 0; i < m; i++) {
3674       jend = ii[i + 1] - ii[i];
3675       olen = 0;
3676       dlen = 0;
3677       for (j = 0; j < jend; j++) {
3678         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3679         else dlen++;
3680         jj++;
3681       }
3682       olens[i] = olen;
3683       dlens[i] = dlen;
3684     }
3685 
3686     PetscCall(ISGetBlockSize(isrow, &bs));
3687     PetscCall(ISGetBlockSize(iscol, &cbs));
3688 
3689     PetscCall(MatCreate(comm, &M));
3690     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3691     PetscCall(MatSetBlockSizes(M, bs, cbs));
3692     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3693     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3694     PetscCall(PetscFree(dlens));
3695 
3696   } else { /* call == MAT_REUSE_MATRIX */
3697     M = *newmat;
3698     PetscCall(MatGetLocalSize(M, &i, NULL));
3699     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3700     PetscCall(MatZeroEntries(M));
3701     /*
3702          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3703        rather than the slower MatSetValues().
3704     */
3705     M->was_assembled = PETSC_TRUE;
3706     M->assembled     = PETSC_FALSE;
3707   }
3708 
3709   /* (5) Set values of Msub to *newmat */
3710   PetscCall(PetscMalloc1(count, &colsub));
3711   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3712 
3713   jj = aij->j;
3714   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3715   for (i = 0; i < m; i++) {
3716     row = rstart + i;
3717     nz  = ii[i + 1] - ii[i];
3718     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3719     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3720     jj += nz;
3721     aa += nz;
3722   }
3723   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3724   PetscCall(ISRestoreIndices(iscmap, &cmap));
3725 
3726   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3727   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3728 
3729   PetscCall(PetscFree(colsub));
3730 
3731   /* save Msub, iscol_sub and iscmap used in processor for next request */
3732   if (call == MAT_INITIAL_MATRIX) {
3733     *newmat = M;
3734     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3735     PetscCall(MatDestroy(&Msub));
3736 
3737     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3738     PetscCall(ISDestroy(&iscol_sub));
3739 
3740     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3741     PetscCall(ISDestroy(&iscmap));
3742 
3743     if (iscol_local) {
3744       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3745       PetscCall(ISDestroy(&iscol_local));
3746     }
3747   }
3748   PetscFunctionReturn(PETSC_SUCCESS);
3749 }
3750 
3751 /*
3752     Not great since it makes two copies of the submatrix, first an SeqAIJ
3753   in local and then by concatenating the local matrices the end result.
3754   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3755 
3756   This requires a sequential iscol with all indices.
3757 */
3758 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3759 {
3760   PetscMPIInt rank, size;
3761   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3762   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3763   Mat         M, Mreuse;
3764   MatScalar  *aa, *vwork;
3765   MPI_Comm    comm;
3766   Mat_SeqAIJ *aij;
3767   PetscBool   colflag, allcolumns = PETSC_FALSE;
3768 
3769   PetscFunctionBegin;
3770   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3771   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3772   PetscCallMPI(MPI_Comm_size(comm, &size));
3773 
3774   /* Check for special case: each processor gets entire matrix columns */
3775   PetscCall(ISIdentity(iscol, &colflag));
3776   PetscCall(ISGetLocalSize(iscol, &n));
3777   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3778   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3779 
3780   if (call == MAT_REUSE_MATRIX) {
3781     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3782     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3783     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3784   } else {
3785     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3786   }
3787 
3788   /*
3789       m - number of local rows
3790       n - number of columns (same on all processors)
3791       rstart - first row in new global matrix generated
3792   */
3793   PetscCall(MatGetSize(Mreuse, &m, &n));
3794   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3795   if (call == MAT_INITIAL_MATRIX) {
3796     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3797     ii  = aij->i;
3798     jj  = aij->j;
3799 
3800     /*
3801         Determine the number of non-zeros in the diagonal and off-diagonal
3802         portions of the matrix in order to do correct preallocation
3803     */
3804 
3805     /* first get start and end of "diagonal" columns */
3806     if (csize == PETSC_DECIDE) {
3807       PetscCall(ISGetSize(isrow, &mglobal));
3808       if (mglobal == n) { /* square matrix */
3809         nlocal = m;
3810       } else {
3811         nlocal = n / size + ((n % size) > rank);
3812       }
3813     } else {
3814       nlocal = csize;
3815     }
3816     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3817     rstart = rend - nlocal;
3818     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3819 
3820     /* next, compute all the lengths */
3821     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3822     olens = dlens + m;
3823     for (i = 0; i < m; i++) {
3824       jend = ii[i + 1] - ii[i];
3825       olen = 0;
3826       dlen = 0;
3827       for (j = 0; j < jend; j++) {
3828         if (*jj < rstart || *jj >= rend) olen++;
3829         else dlen++;
3830         jj++;
3831       }
3832       olens[i] = olen;
3833       dlens[i] = dlen;
3834     }
3835     PetscCall(MatCreate(comm, &M));
3836     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3837     PetscCall(MatSetBlockSizes(M, bs, cbs));
3838     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3839     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3840     PetscCall(PetscFree(dlens));
3841   } else {
3842     PetscInt ml, nl;
3843 
3844     M = *newmat;
3845     PetscCall(MatGetLocalSize(M, &ml, &nl));
3846     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3847     PetscCall(MatZeroEntries(M));
3848     /*
3849          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3850        rather than the slower MatSetValues().
3851     */
3852     M->was_assembled = PETSC_TRUE;
3853     M->assembled     = PETSC_FALSE;
3854   }
3855   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3856   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3857   ii  = aij->i;
3858   jj  = aij->j;
3859 
3860   /* trigger copy to CPU if needed */
3861   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3862   for (i = 0; i < m; i++) {
3863     row   = rstart + i;
3864     nz    = ii[i + 1] - ii[i];
3865     cwork = jj;
3866     jj    = PetscSafePointerPlusOffset(jj, nz);
3867     vwork = aa;
3868     aa    = PetscSafePointerPlusOffset(aa, nz);
3869     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3870   }
3871   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3872 
3873   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3874   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3875   *newmat = M;
3876 
3877   /* save submatrix used in processor for next request */
3878   if (call == MAT_INITIAL_MATRIX) {
3879     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3880     PetscCall(MatDestroy(&Mreuse));
3881   }
3882   PetscFunctionReturn(PETSC_SUCCESS);
3883 }
3884 
3885 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3886 {
3887   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3888   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3889   const PetscInt *JJ;
3890   PetscBool       nooffprocentries;
3891   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3892 
3893   PetscFunctionBegin;
3894   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3895 
3896   PetscCall(PetscLayoutSetUp(B->rmap));
3897   PetscCall(PetscLayoutSetUp(B->cmap));
3898   m      = B->rmap->n;
3899   cstart = B->cmap->rstart;
3900   cend   = B->cmap->rend;
3901   rstart = B->rmap->rstart;
3902 
3903   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3904 
3905   if (PetscDefined(USE_DEBUG)) {
3906     for (i = 0; i < m; i++) {
3907       nnz = Ii[i + 1] - Ii[i];
3908       JJ  = PetscSafePointerPlusOffset(J, Ii[i]);
3909       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3910       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3911       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3912     }
3913   }
3914 
3915   for (i = 0; i < m; i++) {
3916     nnz     = Ii[i + 1] - Ii[i];
3917     JJ      = PetscSafePointerPlusOffset(J, Ii[i]);
3918     nnz_max = PetscMax(nnz_max, nnz);
3919     d       = 0;
3920     for (j = 0; j < nnz; j++) {
3921       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3922     }
3923     d_nnz[i] = d;
3924     o_nnz[i] = nnz - d;
3925   }
3926   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3927   PetscCall(PetscFree2(d_nnz, o_nnz));
3928 
3929   for (i = 0; i < m; i++) {
3930     ii = i + rstart;
3931     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES));
3932   }
3933   nooffprocentries    = B->nooffprocentries;
3934   B->nooffprocentries = PETSC_TRUE;
3935   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3936   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3937   B->nooffprocentries = nooffprocentries;
3938 
3939   /* count number of entries below block diagonal */
3940   PetscCall(PetscFree(Aij->ld));
3941   PetscCall(PetscCalloc1(m, &ld));
3942   Aij->ld = ld;
3943   for (i = 0; i < m; i++) {
3944     nnz = Ii[i + 1] - Ii[i];
3945     j   = 0;
3946     while (j < nnz && J[j] < cstart) j++;
3947     ld[i] = j;
3948     if (J) J += nnz;
3949   }
3950 
3951   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3952   PetscFunctionReturn(PETSC_SUCCESS);
3953 }
3954 
3955 /*@
3956   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3957   (the default parallel PETSc format).
3958 
3959   Collective
3960 
3961   Input Parameters:
3962 + B - the matrix
3963 . i - the indices into `j` for the start of each local row (indices start with zero)
3964 . j - the column indices for each local row (indices start with zero)
3965 - v - optional values in the matrix
3966 
3967   Level: developer
3968 
3969   Notes:
3970   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3971   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3972   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3973 
3974   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3975 
3976   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3977 
3978   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3979 
3980   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3981   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3982 
3983   The format which is used for the sparse matrix input, is equivalent to a
3984   row-major ordering.. i.e for the following matrix, the input data expected is
3985   as shown
3986 .vb
3987         1 0 0
3988         2 0 3     P0
3989        -------
3990         4 5 6     P1
3991 
3992      Process0 [P0] rows_owned=[0,1]
3993         i =  {0,1,3}  [size = nrow+1  = 2+1]
3994         j =  {0,0,2}  [size = 3]
3995         v =  {1,2,3}  [size = 3]
3996 
3997      Process1 [P1] rows_owned=[2]
3998         i =  {0,3}    [size = nrow+1  = 1+1]
3999         j =  {0,1,2}  [size = 3]
4000         v =  {4,5,6}  [size = 3]
4001 .ve
4002 
4003 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4004           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4005 @*/
4006 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4007 {
4008   PetscFunctionBegin;
4009   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4010   PetscFunctionReturn(PETSC_SUCCESS);
4011 }
4012 
4013 /*@C
4014   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4015   (the default parallel PETSc format).  For good matrix assembly performance
4016   the user should preallocate the matrix storage by setting the parameters
4017   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4018 
4019   Collective
4020 
4021   Input Parameters:
4022 + B     - the matrix
4023 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4024            (same value is used for all local rows)
4025 . d_nnz - array containing the number of nonzeros in the various rows of the
4026            DIAGONAL portion of the local submatrix (possibly different for each row)
4027            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4028            The size of this array is equal to the number of local rows, i.e 'm'.
4029            For matrices that will be factored, you must leave room for (and set)
4030            the diagonal entry even if it is zero.
4031 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4032            submatrix (same value is used for all local rows).
4033 - o_nnz - array containing the number of nonzeros in the various rows of the
4034            OFF-DIAGONAL portion of the local submatrix (possibly different for
4035            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4036            structure. The size of this array is equal to the number
4037            of local rows, i.e 'm'.
4038 
4039   Example Usage:
4040   Consider the following 8x8 matrix with 34 non-zero values, that is
4041   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4042   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4043   as follows
4044 
4045 .vb
4046             1  2  0  |  0  3  0  |  0  4
4047     Proc0   0  5  6  |  7  0  0  |  8  0
4048             9  0 10  | 11  0  0  | 12  0
4049     -------------------------------------
4050            13  0 14  | 15 16 17  |  0  0
4051     Proc1   0 18  0  | 19 20 21  |  0  0
4052             0  0  0  | 22 23  0  | 24  0
4053     -------------------------------------
4054     Proc2  25 26 27  |  0  0 28  | 29  0
4055            30  0  0  | 31 32 33  |  0 34
4056 .ve
4057 
4058   This can be represented as a collection of submatrices as
4059 .vb
4060       A B C
4061       D E F
4062       G H I
4063 .ve
4064 
4065   Where the submatrices A,B,C are owned by proc0, D,E,F are
4066   owned by proc1, G,H,I are owned by proc2.
4067 
4068   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4069   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4070   The 'M','N' parameters are 8,8, and have the same values on all procs.
4071 
4072   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4073   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4074   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4075   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4076   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4077   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4078 
4079   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4080   allocated for every row of the local diagonal submatrix, and `o_nz`
4081   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4082   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4083   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4084   In this case, the values of `d_nz`, `o_nz` are
4085 .vb
4086      proc0  dnz = 2, o_nz = 2
4087      proc1  dnz = 3, o_nz = 2
4088      proc2  dnz = 1, o_nz = 4
4089 .ve
4090   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4091   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4092   for proc3. i.e we are using 12+15+10=37 storage locations to store
4093   34 values.
4094 
4095   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4096   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4097   In the above case the values for `d_nnz`, `o_nnz` are
4098 .vb
4099      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4100      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4101      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4102 .ve
4103   Here the space allocated is sum of all the above values i.e 34, and
4104   hence pre-allocation is perfect.
4105 
4106   Level: intermediate
4107 
4108   Notes:
4109   If the *_nnz parameter is given then the *_nz parameter is ignored
4110 
4111   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4112   storage.  The stored row and column indices begin with zero.
4113   See [Sparse Matrices](sec_matsparse) for details.
4114 
4115   The parallel matrix is partitioned such that the first m0 rows belong to
4116   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4117   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4118 
4119   The DIAGONAL portion of the local submatrix of a processor can be defined
4120   as the submatrix which is obtained by extraction the part corresponding to
4121   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4122   first row that belongs to the processor, r2 is the last row belonging to
4123   the this processor, and c1-c2 is range of indices of the local part of a
4124   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4125   common case of a square matrix, the row and column ranges are the same and
4126   the DIAGONAL part is also square. The remaining portion of the local
4127   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4128 
4129   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4130 
4131   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4132   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4133   You can also run with the option `-info` and look for messages with the string
4134   malloc in them to see if additional memory allocation was needed.
4135 
4136 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4137           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4138 @*/
4139 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4140 {
4141   PetscFunctionBegin;
4142   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4143   PetscValidType(B, 1);
4144   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4145   PetscFunctionReturn(PETSC_SUCCESS);
4146 }
4147 
4148 /*@
4149   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4150   CSR format for the local rows.
4151 
4152   Collective
4153 
4154   Input Parameters:
4155 + comm - MPI communicator
4156 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4157 . n    - This value should be the same as the local size used in creating the
4158          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4159          calculated if `N` is given) For square matrices n is almost always `m`.
4160 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4161 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4162 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4163 . j    - global column indices
4164 - a    - optional matrix values
4165 
4166   Output Parameter:
4167 . mat - the matrix
4168 
4169   Level: intermediate
4170 
4171   Notes:
4172   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4173   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4174   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4175 
4176   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4177 
4178   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4179 
4180   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4181   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4182 
4183   The format which is used for the sparse matrix input, is equivalent to a
4184   row-major ordering, i.e., for the following matrix, the input data expected is
4185   as shown
4186 .vb
4187         1 0 0
4188         2 0 3     P0
4189        -------
4190         4 5 6     P1
4191 
4192      Process0 [P0] rows_owned=[0,1]
4193         i =  {0,1,3}  [size = nrow+1  = 2+1]
4194         j =  {0,0,2}  [size = 3]
4195         v =  {1,2,3}  [size = 3]
4196 
4197      Process1 [P1] rows_owned=[2]
4198         i =  {0,3}    [size = nrow+1  = 1+1]
4199         j =  {0,1,2}  [size = 3]
4200         v =  {4,5,6}  [size = 3]
4201 .ve
4202 
4203 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4204           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4205 @*/
4206 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4207 {
4208   PetscFunctionBegin;
4209   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4210   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4211   PetscCall(MatCreate(comm, mat));
4212   PetscCall(MatSetSizes(*mat, m, n, M, N));
4213   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4214   PetscCall(MatSetType(*mat, MATMPIAIJ));
4215   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4216   PetscFunctionReturn(PETSC_SUCCESS);
4217 }
4218 
4219 /*@
4220   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4221   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4222   from `MatCreateMPIAIJWithArrays()`
4223 
4224   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4225 
4226   Collective
4227 
4228   Input Parameters:
4229 + mat - the matrix
4230 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4231 . n   - This value should be the same as the local size used in creating the
4232        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4233        calculated if N is given) For square matrices n is almost always m.
4234 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4235 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4236 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4237 . J   - column indices
4238 - v   - matrix values
4239 
4240   Level: deprecated
4241 
4242 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4243           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4244 @*/
4245 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4246 {
4247   PetscInt        nnz, i;
4248   PetscBool       nooffprocentries;
4249   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4250   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4251   PetscScalar    *ad, *ao;
4252   PetscInt        ldi, Iii, md;
4253   const PetscInt *Adi = Ad->i;
4254   PetscInt       *ld  = Aij->ld;
4255 
4256   PetscFunctionBegin;
4257   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4258   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4259   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4260   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4261 
4262   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4263   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4264 
4265   for (i = 0; i < m; i++) {
4266     if (PetscDefined(USE_DEBUG)) {
4267       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4268         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4269         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4270       }
4271     }
4272     nnz = Ii[i + 1] - Ii[i];
4273     Iii = Ii[i];
4274     ldi = ld[i];
4275     md  = Adi[i + 1] - Adi[i];
4276     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4277     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4278     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4279     ad += md;
4280     ao += nnz - md;
4281   }
4282   nooffprocentries      = mat->nooffprocentries;
4283   mat->nooffprocentries = PETSC_TRUE;
4284   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4285   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4286   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4287   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4288   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4289   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4290   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4291   mat->nooffprocentries = nooffprocentries;
4292   PetscFunctionReturn(PETSC_SUCCESS);
4293 }
4294 
4295 /*@
4296   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4297 
4298   Collective
4299 
4300   Input Parameters:
4301 + mat - the matrix
4302 - v   - matrix values, stored by row
4303 
4304   Level: intermediate
4305 
4306   Notes:
4307   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4308 
4309   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4310 
4311 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4312           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4313 @*/
4314 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4315 {
4316   PetscInt        nnz, i, m;
4317   PetscBool       nooffprocentries;
4318   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4319   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4320   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4321   PetscScalar    *ad, *ao;
4322   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4323   PetscInt        ldi, Iii, md;
4324   PetscInt       *ld = Aij->ld;
4325 
4326   PetscFunctionBegin;
4327   m = mat->rmap->n;
4328 
4329   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4330   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4331   Iii = 0;
4332   for (i = 0; i < m; i++) {
4333     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4334     ldi = ld[i];
4335     md  = Adi[i + 1] - Adi[i];
4336     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4337     ad += md;
4338     if (ao) {
4339       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4340       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4341       ao += nnz - md;
4342     }
4343     Iii += nnz;
4344   }
4345   nooffprocentries      = mat->nooffprocentries;
4346   mat->nooffprocentries = PETSC_TRUE;
4347   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4348   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4349   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4350   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4351   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4352   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4353   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4354   mat->nooffprocentries = nooffprocentries;
4355   PetscFunctionReturn(PETSC_SUCCESS);
4356 }
4357 
4358 /*@C
4359   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4360   (the default parallel PETSc format).  For good matrix assembly performance
4361   the user should preallocate the matrix storage by setting the parameters
4362   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4363 
4364   Collective
4365 
4366   Input Parameters:
4367 + comm  - MPI communicator
4368 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4369           This value should be the same as the local size used in creating the
4370           y vector for the matrix-vector product y = Ax.
4371 . n     - This value should be the same as the local size used in creating the
4372           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4373           calculated if N is given) For square matrices n is almost always m.
4374 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4375 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4376 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4377           (same value is used for all local rows)
4378 . d_nnz - array containing the number of nonzeros in the various rows of the
4379           DIAGONAL portion of the local submatrix (possibly different for each row)
4380           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4381           The size of this array is equal to the number of local rows, i.e 'm'.
4382 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4383           submatrix (same value is used for all local rows).
4384 - o_nnz - array containing the number of nonzeros in the various rows of the
4385           OFF-DIAGONAL portion of the local submatrix (possibly different for
4386           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4387           structure. The size of this array is equal to the number
4388           of local rows, i.e 'm'.
4389 
4390   Output Parameter:
4391 . A - the matrix
4392 
4393   Options Database Keys:
4394 + -mat_no_inode                     - Do not use inodes
4395 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4396 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4397                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4398                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4399 
4400   Level: intermediate
4401 
4402   Notes:
4403   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4404   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4405   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4406 
4407   If the *_nnz parameter is given then the *_nz parameter is ignored
4408 
4409   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4410   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4411   storage requirements for this matrix.
4412 
4413   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4414   processor than it must be used on all processors that share the object for
4415   that argument.
4416 
4417   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4418   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4419 
4420   The user MUST specify either the local or global matrix dimensions
4421   (possibly both).
4422 
4423   The parallel matrix is partitioned across processors such that the
4424   first `m0` rows belong to process 0, the next `m1` rows belong to
4425   process 1, the next `m2` rows belong to process 2, etc., where
4426   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4427   values corresponding to [m x N] submatrix.
4428 
4429   The columns are logically partitioned with the n0 columns belonging
4430   to 0th partition, the next n1 columns belonging to the next
4431   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4432 
4433   The DIAGONAL portion of the local submatrix on any given processor
4434   is the submatrix corresponding to the rows and columns m,n
4435   corresponding to the given processor. i.e diagonal matrix on
4436   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4437   etc. The remaining portion of the local submatrix [m x (N-n)]
4438   constitute the OFF-DIAGONAL portion. The example below better
4439   illustrates this concept.
4440 
4441   For a square global matrix we define each processor's diagonal portion
4442   to be its local rows and the corresponding columns (a square submatrix);
4443   each processor's off-diagonal portion encompasses the remainder of the
4444   local matrix (a rectangular submatrix).
4445 
4446   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4447 
4448   When calling this routine with a single process communicator, a matrix of
4449   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4450   type of communicator, use the construction mechanism
4451 .vb
4452   MatCreate(..., &A);
4453   MatSetType(A, MATMPIAIJ);
4454   MatSetSizes(A, m, n, M, N);
4455   MatMPIAIJSetPreallocation(A, ...);
4456 .ve
4457 
4458   By default, this format uses inodes (identical nodes) when possible.
4459   We search for consecutive rows with the same nonzero structure, thereby
4460   reusing matrix information to achieve increased efficiency.
4461 
4462   Example Usage:
4463   Consider the following 8x8 matrix with 34 non-zero values, that is
4464   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4465   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4466   as follows
4467 
4468 .vb
4469             1  2  0  |  0  3  0  |  0  4
4470     Proc0   0  5  6  |  7  0  0  |  8  0
4471             9  0 10  | 11  0  0  | 12  0
4472     -------------------------------------
4473            13  0 14  | 15 16 17  |  0  0
4474     Proc1   0 18  0  | 19 20 21  |  0  0
4475             0  0  0  | 22 23  0  | 24  0
4476     -------------------------------------
4477     Proc2  25 26 27  |  0  0 28  | 29  0
4478            30  0  0  | 31 32 33  |  0 34
4479 .ve
4480 
4481   This can be represented as a collection of submatrices as
4482 
4483 .vb
4484       A B C
4485       D E F
4486       G H I
4487 .ve
4488 
4489   Where the submatrices A,B,C are owned by proc0, D,E,F are
4490   owned by proc1, G,H,I are owned by proc2.
4491 
4492   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4493   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4494   The 'M','N' parameters are 8,8, and have the same values on all procs.
4495 
4496   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4497   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4498   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4499   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4500   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4501   matrix, ans [DF] as another SeqAIJ matrix.
4502 
4503   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4504   allocated for every row of the local diagonal submatrix, and `o_nz`
4505   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4506   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4507   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4508   In this case, the values of `d_nz`,`o_nz` are
4509 .vb
4510      proc0  dnz = 2, o_nz = 2
4511      proc1  dnz = 3, o_nz = 2
4512      proc2  dnz = 1, o_nz = 4
4513 .ve
4514   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4515   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4516   for proc3. i.e we are using 12+15+10=37 storage locations to store
4517   34 values.
4518 
4519   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4520   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4521   In the above case the values for d_nnz,o_nnz are
4522 .vb
4523      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4524      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4525      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4526 .ve
4527   Here the space allocated is sum of all the above values i.e 34, and
4528   hence pre-allocation is perfect.
4529 
4530 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4531           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4532           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4533 @*/
4534 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4535 {
4536   PetscMPIInt size;
4537 
4538   PetscFunctionBegin;
4539   PetscCall(MatCreate(comm, A));
4540   PetscCall(MatSetSizes(*A, m, n, M, N));
4541   PetscCallMPI(MPI_Comm_size(comm, &size));
4542   if (size > 1) {
4543     PetscCall(MatSetType(*A, MATMPIAIJ));
4544     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4545   } else {
4546     PetscCall(MatSetType(*A, MATSEQAIJ));
4547     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4548   }
4549   PetscFunctionReturn(PETSC_SUCCESS);
4550 }
4551 
4552 /*MC
4553     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4554 
4555     Synopsis:
4556     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4557 
4558     Not Collective
4559 
4560     Input Parameter:
4561 .   A - the `MATMPIAIJ` matrix
4562 
4563     Output Parameters:
4564 +   Ad - the diagonal portion of the matrix
4565 .   Ao - the off-diagonal portion of the matrix
4566 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4567 -   ierr - error code
4568 
4569      Level: advanced
4570 
4571     Note:
4572     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4573 
4574 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4575 M*/
4576 
4577 /*MC
4578     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4579 
4580     Synopsis:
4581     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4582 
4583     Not Collective
4584 
4585     Input Parameters:
4586 +   A - the `MATMPIAIJ` matrix
4587 .   Ad - the diagonal portion of the matrix
4588 .   Ao - the off-diagonal portion of the matrix
4589 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4590 -   ierr - error code
4591 
4592      Level: advanced
4593 
4594 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4595 M*/
4596 
4597 /*@C
4598   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4599 
4600   Not Collective
4601 
4602   Input Parameter:
4603 . A - The `MATMPIAIJ` matrix
4604 
4605   Output Parameters:
4606 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4607 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4608 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4609 
4610   Level: intermediate
4611 
4612   Note:
4613   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4614   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4615   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4616   local column numbers to global column numbers in the original matrix.
4617 
4618   Fortran Notes:
4619   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4620 
4621 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4622 @*/
4623 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4624 {
4625   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4626   PetscBool   flg;
4627 
4628   PetscFunctionBegin;
4629   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4630   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4631   if (Ad) *Ad = a->A;
4632   if (Ao) *Ao = a->B;
4633   if (colmap) *colmap = a->garray;
4634   PetscFunctionReturn(PETSC_SUCCESS);
4635 }
4636 
4637 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4638 {
4639   PetscInt     m, N, i, rstart, nnz, Ii;
4640   PetscInt    *indx;
4641   PetscScalar *values;
4642   MatType      rootType;
4643 
4644   PetscFunctionBegin;
4645   PetscCall(MatGetSize(inmat, &m, &N));
4646   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4647     PetscInt *dnz, *onz, sum, bs, cbs;
4648 
4649     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4650     /* Check sum(n) = N */
4651     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4652     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4653 
4654     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4655     rstart -= m;
4656 
4657     MatPreallocateBegin(comm, m, n, dnz, onz);
4658     for (i = 0; i < m; i++) {
4659       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4660       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4661       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4662     }
4663 
4664     PetscCall(MatCreate(comm, outmat));
4665     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4666     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4667     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4668     PetscCall(MatGetRootType_Private(inmat, &rootType));
4669     PetscCall(MatSetType(*outmat, rootType));
4670     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4671     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4672     MatPreallocateEnd(dnz, onz);
4673     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4674   }
4675 
4676   /* numeric phase */
4677   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4678   for (i = 0; i < m; i++) {
4679     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4680     Ii = i + rstart;
4681     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4682     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4683   }
4684   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4685   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4686   PetscFunctionReturn(PETSC_SUCCESS);
4687 }
4688 
4689 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4690 {
4691   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4692 
4693   PetscFunctionBegin;
4694   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4695   PetscCall(PetscFree(merge->id_r));
4696   PetscCall(PetscFree(merge->len_s));
4697   PetscCall(PetscFree(merge->len_r));
4698   PetscCall(PetscFree(merge->bi));
4699   PetscCall(PetscFree(merge->bj));
4700   PetscCall(PetscFree(merge->buf_ri[0]));
4701   PetscCall(PetscFree(merge->buf_ri));
4702   PetscCall(PetscFree(merge->buf_rj[0]));
4703   PetscCall(PetscFree(merge->buf_rj));
4704   PetscCall(PetscFree(merge->coi));
4705   PetscCall(PetscFree(merge->coj));
4706   PetscCall(PetscFree(merge->owners_co));
4707   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4708   PetscCall(PetscFree(merge));
4709   PetscFunctionReturn(PETSC_SUCCESS);
4710 }
4711 
4712 #include <../src/mat/utils/freespace.h>
4713 #include <petscbt.h>
4714 
4715 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4716 {
4717   MPI_Comm             comm;
4718   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4719   PetscMPIInt          size, rank, taga, *len_s;
4720   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4721   PetscInt             proc, m;
4722   PetscInt           **buf_ri, **buf_rj;
4723   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4724   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4725   MPI_Request         *s_waits, *r_waits;
4726   MPI_Status          *status;
4727   const MatScalar     *aa, *a_a;
4728   MatScalar          **abuf_r, *ba_i;
4729   Mat_Merge_SeqsToMPI *merge;
4730   PetscContainer       container;
4731 
4732   PetscFunctionBegin;
4733   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4734   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4735 
4736   PetscCallMPI(MPI_Comm_size(comm, &size));
4737   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4738 
4739   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4740   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4741   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4742   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4743   aa = a_a;
4744 
4745   bi     = merge->bi;
4746   bj     = merge->bj;
4747   buf_ri = merge->buf_ri;
4748   buf_rj = merge->buf_rj;
4749 
4750   PetscCall(PetscMalloc1(size, &status));
4751   owners = merge->rowmap->range;
4752   len_s  = merge->len_s;
4753 
4754   /* send and recv matrix values */
4755   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4756   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4757 
4758   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4759   for (proc = 0, k = 0; proc < size; proc++) {
4760     if (!len_s[proc]) continue;
4761     i = owners[proc];
4762     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4763     k++;
4764   }
4765 
4766   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4767   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4768   PetscCall(PetscFree(status));
4769 
4770   PetscCall(PetscFree(s_waits));
4771   PetscCall(PetscFree(r_waits));
4772 
4773   /* insert mat values of mpimat */
4774   PetscCall(PetscMalloc1(N, &ba_i));
4775   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4776 
4777   for (k = 0; k < merge->nrecv; k++) {
4778     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4779     nrows       = *buf_ri_k[k];
4780     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4781     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4782   }
4783 
4784   /* set values of ba */
4785   m = merge->rowmap->n;
4786   for (i = 0; i < m; i++) {
4787     arow = owners[rank] + i;
4788     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4789     bnzi = bi[i + 1] - bi[i];
4790     PetscCall(PetscArrayzero(ba_i, bnzi));
4791 
4792     /* add local non-zero vals of this proc's seqmat into ba */
4793     anzi   = ai[arow + 1] - ai[arow];
4794     aj     = a->j + ai[arow];
4795     aa     = a_a + ai[arow];
4796     nextaj = 0;
4797     for (j = 0; nextaj < anzi; j++) {
4798       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4799         ba_i[j] += aa[nextaj++];
4800       }
4801     }
4802 
4803     /* add received vals into ba */
4804     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4805       /* i-th row */
4806       if (i == *nextrow[k]) {
4807         anzi   = *(nextai[k] + 1) - *nextai[k];
4808         aj     = buf_rj[k] + *nextai[k];
4809         aa     = abuf_r[k] + *nextai[k];
4810         nextaj = 0;
4811         for (j = 0; nextaj < anzi; j++) {
4812           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4813             ba_i[j] += aa[nextaj++];
4814           }
4815         }
4816         nextrow[k]++;
4817         nextai[k]++;
4818       }
4819     }
4820     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4821   }
4822   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4823   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4824   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4825 
4826   PetscCall(PetscFree(abuf_r[0]));
4827   PetscCall(PetscFree(abuf_r));
4828   PetscCall(PetscFree(ba_i));
4829   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4830   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4831   PetscFunctionReturn(PETSC_SUCCESS);
4832 }
4833 
4834 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4835 {
4836   Mat                  B_mpi;
4837   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4838   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4839   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4840   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4841   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4842   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4843   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4844   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4845   MPI_Status          *status;
4846   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4847   PetscBT              lnkbt;
4848   Mat_Merge_SeqsToMPI *merge;
4849   PetscContainer       container;
4850 
4851   PetscFunctionBegin;
4852   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4853 
4854   /* make sure it is a PETSc comm */
4855   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4856   PetscCallMPI(MPI_Comm_size(comm, &size));
4857   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4858 
4859   PetscCall(PetscNew(&merge));
4860   PetscCall(PetscMalloc1(size, &status));
4861 
4862   /* determine row ownership */
4863   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4864   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4865   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4866   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4867   PetscCall(PetscLayoutSetUp(merge->rowmap));
4868   PetscCall(PetscMalloc1(size, &len_si));
4869   PetscCall(PetscMalloc1(size, &merge->len_s));
4870 
4871   m      = merge->rowmap->n;
4872   owners = merge->rowmap->range;
4873 
4874   /* determine the number of messages to send, their lengths */
4875   len_s = merge->len_s;
4876 
4877   len          = 0; /* length of buf_si[] */
4878   merge->nsend = 0;
4879   for (proc = 0; proc < size; proc++) {
4880     len_si[proc] = 0;
4881     if (proc == rank) {
4882       len_s[proc] = 0;
4883     } else {
4884       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4885       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4886     }
4887     if (len_s[proc]) {
4888       merge->nsend++;
4889       nrows = 0;
4890       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4891         if (ai[i + 1] > ai[i]) nrows++;
4892       }
4893       len_si[proc] = 2 * (nrows + 1);
4894       len += len_si[proc];
4895     }
4896   }
4897 
4898   /* determine the number and length of messages to receive for ij-structure */
4899   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4900   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4901 
4902   /* post the Irecv of j-structure */
4903   PetscCall(PetscCommGetNewTag(comm, &tagj));
4904   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4905 
4906   /* post the Isend of j-structure */
4907   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4908 
4909   for (proc = 0, k = 0; proc < size; proc++) {
4910     if (!len_s[proc]) continue;
4911     i = owners[proc];
4912     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4913     k++;
4914   }
4915 
4916   /* receives and sends of j-structure are complete */
4917   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4918   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4919 
4920   /* send and recv i-structure */
4921   PetscCall(PetscCommGetNewTag(comm, &tagi));
4922   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4923 
4924   PetscCall(PetscMalloc1(len + 1, &buf_s));
4925   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4926   for (proc = 0, k = 0; proc < size; proc++) {
4927     if (!len_s[proc]) continue;
4928     /* form outgoing message for i-structure:
4929          buf_si[0]:                 nrows to be sent
4930                [1:nrows]:           row index (global)
4931                [nrows+1:2*nrows+1]: i-structure index
4932     */
4933     nrows       = len_si[proc] / 2 - 1;
4934     buf_si_i    = buf_si + nrows + 1;
4935     buf_si[0]   = nrows;
4936     buf_si_i[0] = 0;
4937     nrows       = 0;
4938     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4939       anzi = ai[i + 1] - ai[i];
4940       if (anzi) {
4941         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4942         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4943         nrows++;
4944       }
4945     }
4946     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4947     k++;
4948     buf_si += len_si[proc];
4949   }
4950 
4951   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4952   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4953 
4954   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4955   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4956 
4957   PetscCall(PetscFree(len_si));
4958   PetscCall(PetscFree(len_ri));
4959   PetscCall(PetscFree(rj_waits));
4960   PetscCall(PetscFree2(si_waits, sj_waits));
4961   PetscCall(PetscFree(ri_waits));
4962   PetscCall(PetscFree(buf_s));
4963   PetscCall(PetscFree(status));
4964 
4965   /* compute a local seq matrix in each processor */
4966   /* allocate bi array and free space for accumulating nonzero column info */
4967   PetscCall(PetscMalloc1(m + 1, &bi));
4968   bi[0] = 0;
4969 
4970   /* create and initialize a linked list */
4971   nlnk = N + 1;
4972   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4973 
4974   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4975   len = ai[owners[rank + 1]] - ai[owners[rank]];
4976   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4977 
4978   current_space = free_space;
4979 
4980   /* determine symbolic info for each local row */
4981   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4982 
4983   for (k = 0; k < merge->nrecv; k++) {
4984     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4985     nrows       = *buf_ri_k[k];
4986     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4987     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4988   }
4989 
4990   MatPreallocateBegin(comm, m, n, dnz, onz);
4991   len = 0;
4992   for (i = 0; i < m; i++) {
4993     bnzi = 0;
4994     /* add local non-zero cols of this proc's seqmat into lnk */
4995     arow = owners[rank] + i;
4996     anzi = ai[arow + 1] - ai[arow];
4997     aj   = a->j + ai[arow];
4998     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4999     bnzi += nlnk;
5000     /* add received col data into lnk */
5001     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5002       if (i == *nextrow[k]) {            /* i-th row */
5003         anzi = *(nextai[k] + 1) - *nextai[k];
5004         aj   = buf_rj[k] + *nextai[k];
5005         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5006         bnzi += nlnk;
5007         nextrow[k]++;
5008         nextai[k]++;
5009       }
5010     }
5011     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5012 
5013     /* if free space is not available, make more free space */
5014     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5015     /* copy data into free space, then initialize lnk */
5016     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5017     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5018 
5019     current_space->array += bnzi;
5020     current_space->local_used += bnzi;
5021     current_space->local_remaining -= bnzi;
5022 
5023     bi[i + 1] = bi[i] + bnzi;
5024   }
5025 
5026   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5027 
5028   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5029   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5030   PetscCall(PetscLLDestroy(lnk, lnkbt));
5031 
5032   /* create symbolic parallel matrix B_mpi */
5033   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5034   PetscCall(MatCreate(comm, &B_mpi));
5035   if (n == PETSC_DECIDE) {
5036     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5037   } else {
5038     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5039   }
5040   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5041   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5042   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5043   MatPreallocateEnd(dnz, onz);
5044   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5045 
5046   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5047   B_mpi->assembled = PETSC_FALSE;
5048   merge->bi        = bi;
5049   merge->bj        = bj;
5050   merge->buf_ri    = buf_ri;
5051   merge->buf_rj    = buf_rj;
5052   merge->coi       = NULL;
5053   merge->coj       = NULL;
5054   merge->owners_co = NULL;
5055 
5056   PetscCall(PetscCommDestroy(&comm));
5057 
5058   /* attach the supporting struct to B_mpi for reuse */
5059   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5060   PetscCall(PetscContainerSetPointer(container, merge));
5061   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5062   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5063   PetscCall(PetscContainerDestroy(&container));
5064   *mpimat = B_mpi;
5065 
5066   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5067   PetscFunctionReturn(PETSC_SUCCESS);
5068 }
5069 
5070 /*@
5071   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5072   matrices from each processor
5073 
5074   Collective
5075 
5076   Input Parameters:
5077 + comm   - the communicators the parallel matrix will live on
5078 . seqmat - the input sequential matrices
5079 . m      - number of local rows (or `PETSC_DECIDE`)
5080 . n      - number of local columns (or `PETSC_DECIDE`)
5081 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5082 
5083   Output Parameter:
5084 . mpimat - the parallel matrix generated
5085 
5086   Level: advanced
5087 
5088   Note:
5089   The dimensions of the sequential matrix in each processor MUST be the same.
5090   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5091   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5092 
5093 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5094 @*/
5095 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5096 {
5097   PetscMPIInt size;
5098 
5099   PetscFunctionBegin;
5100   PetscCallMPI(MPI_Comm_size(comm, &size));
5101   if (size == 1) {
5102     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5103     if (scall == MAT_INITIAL_MATRIX) {
5104       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5105     } else {
5106       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5107     }
5108     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5109     PetscFunctionReturn(PETSC_SUCCESS);
5110   }
5111   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5112   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5113   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5114   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5115   PetscFunctionReturn(PETSC_SUCCESS);
5116 }
5117 
5118 /*@
5119   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5120 
5121   Not Collective
5122 
5123   Input Parameter:
5124 . A - the matrix
5125 
5126   Output Parameter:
5127 . A_loc - the local sequential matrix generated
5128 
5129   Level: developer
5130 
5131   Notes:
5132   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5133   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5134   `n` is the global column count obtained with `MatGetSize()`
5135 
5136   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5137 
5138   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5139 
5140   Destroy the matrix with `MatDestroy()`
5141 
5142 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5143 @*/
5144 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5145 {
5146   PetscBool mpi;
5147 
5148   PetscFunctionBegin;
5149   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5150   if (mpi) {
5151     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5152   } else {
5153     *A_loc = A;
5154     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5155   }
5156   PetscFunctionReturn(PETSC_SUCCESS);
5157 }
5158 
5159 /*@
5160   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5161 
5162   Not Collective
5163 
5164   Input Parameters:
5165 + A     - the matrix
5166 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5167 
5168   Output Parameter:
5169 . A_loc - the local sequential matrix generated
5170 
5171   Level: developer
5172 
5173   Notes:
5174   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5175   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5176   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5177 
5178   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5179 
5180   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5181   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5182   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5183   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5184 
5185 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5186 @*/
5187 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5188 {
5189   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5190   Mat_SeqAIJ        *mat, *a, *b;
5191   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5192   const PetscScalar *aa, *ba, *aav, *bav;
5193   PetscScalar       *ca, *cam;
5194   PetscMPIInt        size;
5195   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5196   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5197   PetscBool          match;
5198 
5199   PetscFunctionBegin;
5200   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5201   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5202   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5203   if (size == 1) {
5204     if (scall == MAT_INITIAL_MATRIX) {
5205       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5206       *A_loc = mpimat->A;
5207     } else if (scall == MAT_REUSE_MATRIX) {
5208       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5209     }
5210     PetscFunctionReturn(PETSC_SUCCESS);
5211   }
5212 
5213   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5214   a  = (Mat_SeqAIJ *)mpimat->A->data;
5215   b  = (Mat_SeqAIJ *)mpimat->B->data;
5216   ai = a->i;
5217   aj = a->j;
5218   bi = b->i;
5219   bj = b->j;
5220   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5221   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5222   aa = aav;
5223   ba = bav;
5224   if (scall == MAT_INITIAL_MATRIX) {
5225     PetscCall(PetscMalloc1(1 + am, &ci));
5226     ci[0] = 0;
5227     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5228     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5229     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5230     k = 0;
5231     for (i = 0; i < am; i++) {
5232       ncols_o = bi[i + 1] - bi[i];
5233       ncols_d = ai[i + 1] - ai[i];
5234       /* off-diagonal portion of A */
5235       for (jo = 0; jo < ncols_o; jo++) {
5236         col = cmap[*bj];
5237         if (col >= cstart) break;
5238         cj[k] = col;
5239         bj++;
5240         ca[k++] = *ba++;
5241       }
5242       /* diagonal portion of A */
5243       for (j = 0; j < ncols_d; j++) {
5244         cj[k]   = cstart + *aj++;
5245         ca[k++] = *aa++;
5246       }
5247       /* off-diagonal portion of A */
5248       for (j = jo; j < ncols_o; j++) {
5249         cj[k]   = cmap[*bj++];
5250         ca[k++] = *ba++;
5251       }
5252     }
5253     /* put together the new matrix */
5254     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5255     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5256     /* Since these are PETSc arrays, change flags to free them as necessary. */
5257     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5258     mat->free_a  = PETSC_TRUE;
5259     mat->free_ij = PETSC_TRUE;
5260     mat->nonew   = 0;
5261   } else if (scall == MAT_REUSE_MATRIX) {
5262     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5263     ci  = mat->i;
5264     cj  = mat->j;
5265     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5266     for (i = 0; i < am; i++) {
5267       /* off-diagonal portion of A */
5268       ncols_o = bi[i + 1] - bi[i];
5269       for (jo = 0; jo < ncols_o; jo++) {
5270         col = cmap[*bj];
5271         if (col >= cstart) break;
5272         *cam++ = *ba++;
5273         bj++;
5274       }
5275       /* diagonal portion of A */
5276       ncols_d = ai[i + 1] - ai[i];
5277       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5278       /* off-diagonal portion of A */
5279       for (j = jo; j < ncols_o; j++) {
5280         *cam++ = *ba++;
5281         bj++;
5282       }
5283     }
5284     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5285   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5286   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5287   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5288   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5289   PetscFunctionReturn(PETSC_SUCCESS);
5290 }
5291 
5292 /*@
5293   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5294   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5295 
5296   Not Collective
5297 
5298   Input Parameters:
5299 + A     - the matrix
5300 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5301 
5302   Output Parameters:
5303 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5304 - A_loc - the local sequential matrix generated
5305 
5306   Level: developer
5307 
5308   Note:
5309   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5310   part, then those associated with the off-diagonal part (in its local ordering)
5311 
5312 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5313 @*/
5314 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5315 {
5316   Mat             Ao, Ad;
5317   const PetscInt *cmap;
5318   PetscMPIInt     size;
5319   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5320 
5321   PetscFunctionBegin;
5322   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5323   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5324   if (size == 1) {
5325     if (scall == MAT_INITIAL_MATRIX) {
5326       PetscCall(PetscObjectReference((PetscObject)Ad));
5327       *A_loc = Ad;
5328     } else if (scall == MAT_REUSE_MATRIX) {
5329       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5330     }
5331     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5332     PetscFunctionReturn(PETSC_SUCCESS);
5333   }
5334   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5335   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5336   if (f) {
5337     PetscCall((*f)(A, scall, glob, A_loc));
5338   } else {
5339     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5340     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5341     Mat_SeqAIJ        *c;
5342     PetscInt          *ai = a->i, *aj = a->j;
5343     PetscInt          *bi = b->i, *bj = b->j;
5344     PetscInt          *ci, *cj;
5345     const PetscScalar *aa, *ba;
5346     PetscScalar       *ca;
5347     PetscInt           i, j, am, dn, on;
5348 
5349     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5350     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5351     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5352     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5353     if (scall == MAT_INITIAL_MATRIX) {
5354       PetscInt k;
5355       PetscCall(PetscMalloc1(1 + am, &ci));
5356       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5357       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5358       ci[0] = 0;
5359       for (i = 0, k = 0; i < am; i++) {
5360         const PetscInt ncols_o = bi[i + 1] - bi[i];
5361         const PetscInt ncols_d = ai[i + 1] - ai[i];
5362         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5363         /* diagonal portion of A */
5364         for (j = 0; j < ncols_d; j++, k++) {
5365           cj[k] = *aj++;
5366           ca[k] = *aa++;
5367         }
5368         /* off-diagonal portion of A */
5369         for (j = 0; j < ncols_o; j++, k++) {
5370           cj[k] = dn + *bj++;
5371           ca[k] = *ba++;
5372         }
5373       }
5374       /* put together the new matrix */
5375       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5376       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5377       /* Since these are PETSc arrays, change flags to free them as necessary. */
5378       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5379       c->free_a  = PETSC_TRUE;
5380       c->free_ij = PETSC_TRUE;
5381       c->nonew   = 0;
5382       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5383     } else if (scall == MAT_REUSE_MATRIX) {
5384       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5385       for (i = 0; i < am; i++) {
5386         const PetscInt ncols_d = ai[i + 1] - ai[i];
5387         const PetscInt ncols_o = bi[i + 1] - bi[i];
5388         /* diagonal portion of A */
5389         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5390         /* off-diagonal portion of A */
5391         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5392       }
5393       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5394     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5395     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5396     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5397     if (glob) {
5398       PetscInt cst, *gidx;
5399 
5400       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5401       PetscCall(PetscMalloc1(dn + on, &gidx));
5402       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5403       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5404       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5405     }
5406   }
5407   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5408   PetscFunctionReturn(PETSC_SUCCESS);
5409 }
5410 
5411 /*@C
5412   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5413 
5414   Not Collective
5415 
5416   Input Parameters:
5417 + A     - the matrix
5418 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5419 . row   - index set of rows to extract (or `NULL`)
5420 - col   - index set of columns to extract (or `NULL`)
5421 
5422   Output Parameter:
5423 . A_loc - the local sequential matrix generated
5424 
5425   Level: developer
5426 
5427 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5428 @*/
5429 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5430 {
5431   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5432   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5433   IS          isrowa, iscola;
5434   Mat        *aloc;
5435   PetscBool   match;
5436 
5437   PetscFunctionBegin;
5438   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5439   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5440   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5441   if (!row) {
5442     start = A->rmap->rstart;
5443     end   = A->rmap->rend;
5444     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5445   } else {
5446     isrowa = *row;
5447   }
5448   if (!col) {
5449     start = A->cmap->rstart;
5450     cmap  = a->garray;
5451     nzA   = a->A->cmap->n;
5452     nzB   = a->B->cmap->n;
5453     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5454     ncols = 0;
5455     for (i = 0; i < nzB; i++) {
5456       if (cmap[i] < start) idx[ncols++] = cmap[i];
5457       else break;
5458     }
5459     imark = i;
5460     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5461     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5462     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5463   } else {
5464     iscola = *col;
5465   }
5466   if (scall != MAT_INITIAL_MATRIX) {
5467     PetscCall(PetscMalloc1(1, &aloc));
5468     aloc[0] = *A_loc;
5469   }
5470   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5471   if (!col) { /* attach global id of condensed columns */
5472     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5473   }
5474   *A_loc = aloc[0];
5475   PetscCall(PetscFree(aloc));
5476   if (!row) PetscCall(ISDestroy(&isrowa));
5477   if (!col) PetscCall(ISDestroy(&iscola));
5478   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5479   PetscFunctionReturn(PETSC_SUCCESS);
5480 }
5481 
5482 /*
5483  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5484  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5485  * on a global size.
5486  * */
5487 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5488 {
5489   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5490   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5491   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5492   PetscMPIInt            owner;
5493   PetscSFNode           *iremote, *oiremote;
5494   const PetscInt        *lrowindices;
5495   PetscSF                sf, osf;
5496   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5497   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5498   MPI_Comm               comm;
5499   ISLocalToGlobalMapping mapping;
5500   const PetscScalar     *pd_a, *po_a;
5501 
5502   PetscFunctionBegin;
5503   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5504   /* plocalsize is the number of roots
5505    * nrows is the number of leaves
5506    * */
5507   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5508   PetscCall(ISGetLocalSize(rows, &nrows));
5509   PetscCall(PetscCalloc1(nrows, &iremote));
5510   PetscCall(ISGetIndices(rows, &lrowindices));
5511   for (i = 0; i < nrows; i++) {
5512     /* Find a remote index and an owner for a row
5513      * The row could be local or remote
5514      * */
5515     owner = 0;
5516     lidx  = 0;
5517     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5518     iremote[i].index = lidx;
5519     iremote[i].rank  = owner;
5520   }
5521   /* Create SF to communicate how many nonzero columns for each row */
5522   PetscCall(PetscSFCreate(comm, &sf));
5523   /* SF will figure out the number of nonzero columns for each row, and their
5524    * offsets
5525    * */
5526   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5527   PetscCall(PetscSFSetFromOptions(sf));
5528   PetscCall(PetscSFSetUp(sf));
5529 
5530   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5531   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5532   PetscCall(PetscCalloc1(nrows, &pnnz));
5533   roffsets[0] = 0;
5534   roffsets[1] = 0;
5535   for (i = 0; i < plocalsize; i++) {
5536     /* diagonal */
5537     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5538     /* off-diagonal */
5539     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5540     /* compute offsets so that we relative location for each row */
5541     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5542     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5543   }
5544   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5545   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5546   /* 'r' means root, and 'l' means leaf */
5547   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5548   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5549   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5550   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5551   PetscCall(PetscSFDestroy(&sf));
5552   PetscCall(PetscFree(roffsets));
5553   PetscCall(PetscFree(nrcols));
5554   dntotalcols = 0;
5555   ontotalcols = 0;
5556   ncol        = 0;
5557   for (i = 0; i < nrows; i++) {
5558     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5559     ncol    = PetscMax(pnnz[i], ncol);
5560     /* diagonal */
5561     dntotalcols += nlcols[i * 2 + 0];
5562     /* off-diagonal */
5563     ontotalcols += nlcols[i * 2 + 1];
5564   }
5565   /* We do not need to figure the right number of columns
5566    * since all the calculations will be done by going through the raw data
5567    * */
5568   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5569   PetscCall(MatSetUp(*P_oth));
5570   PetscCall(PetscFree(pnnz));
5571   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5572   /* diagonal */
5573   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5574   /* off-diagonal */
5575   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5576   /* diagonal */
5577   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5578   /* off-diagonal */
5579   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5580   dntotalcols = 0;
5581   ontotalcols = 0;
5582   ntotalcols  = 0;
5583   for (i = 0; i < nrows; i++) {
5584     owner = 0;
5585     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5586     /* Set iremote for diag matrix */
5587     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5588       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5589       iremote[dntotalcols].rank  = owner;
5590       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5591       ilocal[dntotalcols++] = ntotalcols++;
5592     }
5593     /* off-diagonal */
5594     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5595       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5596       oiremote[ontotalcols].rank  = owner;
5597       oilocal[ontotalcols++]      = ntotalcols++;
5598     }
5599   }
5600   PetscCall(ISRestoreIndices(rows, &lrowindices));
5601   PetscCall(PetscFree(loffsets));
5602   PetscCall(PetscFree(nlcols));
5603   PetscCall(PetscSFCreate(comm, &sf));
5604   /* P serves as roots and P_oth is leaves
5605    * Diag matrix
5606    * */
5607   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5608   PetscCall(PetscSFSetFromOptions(sf));
5609   PetscCall(PetscSFSetUp(sf));
5610 
5611   PetscCall(PetscSFCreate(comm, &osf));
5612   /* off-diagonal */
5613   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5614   PetscCall(PetscSFSetFromOptions(osf));
5615   PetscCall(PetscSFSetUp(osf));
5616   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5617   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5618   /* operate on the matrix internal data to save memory */
5619   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5620   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5621   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5622   /* Convert to global indices for diag matrix */
5623   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5624   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5625   /* We want P_oth store global indices */
5626   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5627   /* Use memory scalable approach */
5628   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5629   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5630   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5631   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5632   /* Convert back to local indices */
5633   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5634   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5635   nout = 0;
5636   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5637   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5638   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5639   /* Exchange values */
5640   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5641   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5642   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5643   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5644   /* Stop PETSc from shrinking memory */
5645   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5646   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5647   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5648   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5649   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5650   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5651   PetscCall(PetscSFDestroy(&sf));
5652   PetscCall(PetscSFDestroy(&osf));
5653   PetscFunctionReturn(PETSC_SUCCESS);
5654 }
5655 
5656 /*
5657  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5658  * This supports MPIAIJ and MAIJ
5659  * */
5660 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5661 {
5662   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5663   Mat_SeqAIJ *p_oth;
5664   IS          rows, map;
5665   PetscHMapI  hamp;
5666   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5667   MPI_Comm    comm;
5668   PetscSF     sf, osf;
5669   PetscBool   has;
5670 
5671   PetscFunctionBegin;
5672   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5673   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5674   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5675    *  and then create a submatrix (that often is an overlapping matrix)
5676    * */
5677   if (reuse == MAT_INITIAL_MATRIX) {
5678     /* Use a hash table to figure out unique keys */
5679     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5680     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5681     count = 0;
5682     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5683     for (i = 0; i < a->B->cmap->n; i++) {
5684       key = a->garray[i] / dof;
5685       PetscCall(PetscHMapIHas(hamp, key, &has));
5686       if (!has) {
5687         mapping[i] = count;
5688         PetscCall(PetscHMapISet(hamp, key, count++));
5689       } else {
5690         /* Current 'i' has the same value the previous step */
5691         mapping[i] = count - 1;
5692       }
5693     }
5694     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5695     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5696     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5697     PetscCall(PetscCalloc1(htsize, &rowindices));
5698     off = 0;
5699     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5700     PetscCall(PetscHMapIDestroy(&hamp));
5701     PetscCall(PetscSortInt(htsize, rowindices));
5702     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5703     /* In case, the matrix was already created but users want to recreate the matrix */
5704     PetscCall(MatDestroy(P_oth));
5705     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5706     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5707     PetscCall(ISDestroy(&map));
5708     PetscCall(ISDestroy(&rows));
5709   } else if (reuse == MAT_REUSE_MATRIX) {
5710     /* If matrix was already created, we simply update values using SF objects
5711      * that as attached to the matrix earlier.
5712      */
5713     const PetscScalar *pd_a, *po_a;
5714 
5715     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5716     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5717     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5718     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5719     /* Update values in place */
5720     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5721     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5722     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5723     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5724     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5725     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5726     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5727     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5728   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5729   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5730   PetscFunctionReturn(PETSC_SUCCESS);
5731 }
5732 
5733 /*@C
5734   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5735 
5736   Collective
5737 
5738   Input Parameters:
5739 + A     - the first matrix in `MATMPIAIJ` format
5740 . B     - the second matrix in `MATMPIAIJ` format
5741 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5742 
5743   Output Parameters:
5744 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5745 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5746 - B_seq - the sequential matrix generated
5747 
5748   Level: developer
5749 
5750 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5751 @*/
5752 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5753 {
5754   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5755   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5756   IS          isrowb, iscolb;
5757   Mat        *bseq = NULL;
5758 
5759   PetscFunctionBegin;
5760   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5761              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5762   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5763 
5764   if (scall == MAT_INITIAL_MATRIX) {
5765     start = A->cmap->rstart;
5766     cmap  = a->garray;
5767     nzA   = a->A->cmap->n;
5768     nzB   = a->B->cmap->n;
5769     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5770     ncols = 0;
5771     for (i = 0; i < nzB; i++) { /* row < local row index */
5772       if (cmap[i] < start) idx[ncols++] = cmap[i];
5773       else break;
5774     }
5775     imark = i;
5776     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5777     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5778     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5779     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5780   } else {
5781     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5782     isrowb = *rowb;
5783     iscolb = *colb;
5784     PetscCall(PetscMalloc1(1, &bseq));
5785     bseq[0] = *B_seq;
5786   }
5787   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5788   *B_seq = bseq[0];
5789   PetscCall(PetscFree(bseq));
5790   if (!rowb) {
5791     PetscCall(ISDestroy(&isrowb));
5792   } else {
5793     *rowb = isrowb;
5794   }
5795   if (!colb) {
5796     PetscCall(ISDestroy(&iscolb));
5797   } else {
5798     *colb = iscolb;
5799   }
5800   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5801   PetscFunctionReturn(PETSC_SUCCESS);
5802 }
5803 
5804 /*
5805     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5806     of the OFF-DIAGONAL portion of local A
5807 
5808     Collective
5809 
5810    Input Parameters:
5811 +    A,B - the matrices in `MATMPIAIJ` format
5812 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5813 
5814    Output Parameter:
5815 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5816 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5817 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5818 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5819 
5820     Developer Note:
5821     This directly accesses information inside the VecScatter associated with the matrix-vector product
5822      for this matrix. This is not desirable..
5823 
5824     Level: developer
5825 
5826 */
5827 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5828 {
5829   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5830   Mat_SeqAIJ        *b_oth;
5831   VecScatter         ctx;
5832   MPI_Comm           comm;
5833   const PetscMPIInt *rprocs, *sprocs;
5834   const PetscInt    *srow, *rstarts, *sstarts;
5835   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5836   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5837   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5838   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5839   PetscMPIInt        size, tag, rank, nreqs;
5840 
5841   PetscFunctionBegin;
5842   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5843   PetscCallMPI(MPI_Comm_size(comm, &size));
5844 
5845   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5846              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5847   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5848   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5849 
5850   if (size == 1) {
5851     startsj_s = NULL;
5852     bufa_ptr  = NULL;
5853     *B_oth    = NULL;
5854     PetscFunctionReturn(PETSC_SUCCESS);
5855   }
5856 
5857   ctx = a->Mvctx;
5858   tag = ((PetscObject)ctx)->tag;
5859 
5860   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5861   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5862   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5863   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5864   PetscCall(PetscMalloc1(nreqs, &reqs));
5865   rwaits = reqs;
5866   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5867 
5868   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5869   if (scall == MAT_INITIAL_MATRIX) {
5870     /* i-array */
5871     /*  post receives */
5872     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5873     for (i = 0; i < nrecvs; i++) {
5874       rowlen = rvalues + rstarts[i] * rbs;
5875       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5876       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5877     }
5878 
5879     /* pack the outgoing message */
5880     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5881 
5882     sstartsj[0] = 0;
5883     rstartsj[0] = 0;
5884     len         = 0; /* total length of j or a array to be sent */
5885     if (nsends) {
5886       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5887       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5888     }
5889     for (i = 0; i < nsends; i++) {
5890       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5891       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5892       for (j = 0; j < nrows; j++) {
5893         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5894         for (l = 0; l < sbs; l++) {
5895           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5896 
5897           rowlen[j * sbs + l] = ncols;
5898 
5899           len += ncols;
5900           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5901         }
5902         k++;
5903       }
5904       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5905 
5906       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5907     }
5908     /* recvs and sends of i-array are completed */
5909     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5910     PetscCall(PetscFree(svalues));
5911 
5912     /* allocate buffers for sending j and a arrays */
5913     PetscCall(PetscMalloc1(len + 1, &bufj));
5914     PetscCall(PetscMalloc1(len + 1, &bufa));
5915 
5916     /* create i-array of B_oth */
5917     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5918 
5919     b_othi[0] = 0;
5920     len       = 0; /* total length of j or a array to be received */
5921     k         = 0;
5922     for (i = 0; i < nrecvs; i++) {
5923       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5924       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5925       for (j = 0; j < nrows; j++) {
5926         b_othi[k + 1] = b_othi[k] + rowlen[j];
5927         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5928         k++;
5929       }
5930       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5931     }
5932     PetscCall(PetscFree(rvalues));
5933 
5934     /* allocate space for j and a arrays of B_oth */
5935     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5936     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5937 
5938     /* j-array */
5939     /*  post receives of j-array */
5940     for (i = 0; i < nrecvs; i++) {
5941       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5942       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5943     }
5944 
5945     /* pack the outgoing message j-array */
5946     if (nsends) k = sstarts[0];
5947     for (i = 0; i < nsends; i++) {
5948       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5949       bufJ  = bufj + sstartsj[i];
5950       for (j = 0; j < nrows; j++) {
5951         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5952         for (ll = 0; ll < sbs; ll++) {
5953           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5954           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5955           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5956         }
5957       }
5958       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5959     }
5960 
5961     /* recvs and sends of j-array are completed */
5962     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5963   } else if (scall == MAT_REUSE_MATRIX) {
5964     sstartsj = *startsj_s;
5965     rstartsj = *startsj_r;
5966     bufa     = *bufa_ptr;
5967     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5968     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5969   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5970 
5971   /* a-array */
5972   /*  post receives of a-array */
5973   for (i = 0; i < nrecvs; i++) {
5974     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5975     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5976   }
5977 
5978   /* pack the outgoing message a-array */
5979   if (nsends) k = sstarts[0];
5980   for (i = 0; i < nsends; i++) {
5981     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5982     bufA  = bufa + sstartsj[i];
5983     for (j = 0; j < nrows; j++) {
5984       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5985       for (ll = 0; ll < sbs; ll++) {
5986         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5987         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5988         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5989       }
5990     }
5991     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5992   }
5993   /* recvs and sends of a-array are completed */
5994   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5995   PetscCall(PetscFree(reqs));
5996 
5997   if (scall == MAT_INITIAL_MATRIX) {
5998     /* put together the new matrix */
5999     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6000 
6001     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6002     /* Since these are PETSc arrays, change flags to free them as necessary. */
6003     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6004     b_oth->free_a  = PETSC_TRUE;
6005     b_oth->free_ij = PETSC_TRUE;
6006     b_oth->nonew   = 0;
6007 
6008     PetscCall(PetscFree(bufj));
6009     if (!startsj_s || !bufa_ptr) {
6010       PetscCall(PetscFree2(sstartsj, rstartsj));
6011       PetscCall(PetscFree(bufa_ptr));
6012     } else {
6013       *startsj_s = sstartsj;
6014       *startsj_r = rstartsj;
6015       *bufa_ptr  = bufa;
6016     }
6017   } else if (scall == MAT_REUSE_MATRIX) {
6018     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6019   }
6020 
6021   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6022   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6023   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6024   PetscFunctionReturn(PETSC_SUCCESS);
6025 }
6026 
6027 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6030 #if defined(PETSC_HAVE_MKL_SPARSE)
6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6032 #endif
6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6035 #if defined(PETSC_HAVE_ELEMENTAL)
6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6037 #endif
6038 #if defined(PETSC_HAVE_SCALAPACK)
6039 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6040 #endif
6041 #if defined(PETSC_HAVE_HYPRE)
6042 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6043 #endif
6044 #if defined(PETSC_HAVE_CUDA)
6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6046 #endif
6047 #if defined(PETSC_HAVE_HIP)
6048 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6049 #endif
6050 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6052 #endif
6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6054 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6055 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6056 
6057 /*
6058     Computes (B'*A')' since computing B*A directly is untenable
6059 
6060                n                       p                          p
6061         [             ]       [             ]         [                 ]
6062       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6063         [             ]       [             ]         [                 ]
6064 
6065 */
6066 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6067 {
6068   Mat At, Bt, Ct;
6069 
6070   PetscFunctionBegin;
6071   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6072   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6073   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6074   PetscCall(MatDestroy(&At));
6075   PetscCall(MatDestroy(&Bt));
6076   PetscCall(MatTransposeSetPrecursor(Ct, C));
6077   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6078   PetscCall(MatDestroy(&Ct));
6079   PetscFunctionReturn(PETSC_SUCCESS);
6080 }
6081 
6082 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6083 {
6084   PetscBool cisdense;
6085 
6086   PetscFunctionBegin;
6087   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6088   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6089   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6090   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6091   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6092   PetscCall(MatSetUp(C));
6093 
6094   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6095   PetscFunctionReturn(PETSC_SUCCESS);
6096 }
6097 
6098 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6099 {
6100   Mat_Product *product = C->product;
6101   Mat          A = product->A, B = product->B;
6102 
6103   PetscFunctionBegin;
6104   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6105              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6106   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6107   C->ops->productsymbolic = MatProductSymbolic_AB;
6108   PetscFunctionReturn(PETSC_SUCCESS);
6109 }
6110 
6111 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6112 {
6113   Mat_Product *product = C->product;
6114 
6115   PetscFunctionBegin;
6116   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6117   PetscFunctionReturn(PETSC_SUCCESS);
6118 }
6119 
6120 /*
6121    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6122 
6123   Input Parameters:
6124 
6125     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6126     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6127 
6128     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6129 
6130     For Set1, j1[] contains column indices of the nonzeros.
6131     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6132     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6133     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6134 
6135     Similar for Set2.
6136 
6137     This routine merges the two sets of nonzeros row by row and removes repeats.
6138 
6139   Output Parameters: (memory is allocated by the caller)
6140 
6141     i[],j[]: the CSR of the merged matrix, which has m rows.
6142     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6143     imap2[]: similar to imap1[], but for Set2.
6144     Note we order nonzeros row-by-row and from left to right.
6145 */
6146 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6147 {
6148   PetscInt   r, m; /* Row index of mat */
6149   PetscCount t, t1, t2, b1, e1, b2, e2;
6150 
6151   PetscFunctionBegin;
6152   PetscCall(MatGetLocalSize(mat, &m, NULL));
6153   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6154   i[0]        = 0;
6155   for (r = 0; r < m; r++) { /* Do row by row merging */
6156     b1 = rowBegin1[r];
6157     e1 = rowEnd1[r];
6158     b2 = rowBegin2[r];
6159     e2 = rowEnd2[r];
6160     while (b1 < e1 && b2 < e2) {
6161       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6162         j[t]      = j1[b1];
6163         imap1[t1] = t;
6164         imap2[t2] = t;
6165         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6166         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6167         t1++;
6168         t2++;
6169         t++;
6170       } else if (j1[b1] < j2[b2]) {
6171         j[t]      = j1[b1];
6172         imap1[t1] = t;
6173         b1 += jmap1[t1 + 1] - jmap1[t1];
6174         t1++;
6175         t++;
6176       } else {
6177         j[t]      = j2[b2];
6178         imap2[t2] = t;
6179         b2 += jmap2[t2 + 1] - jmap2[t2];
6180         t2++;
6181         t++;
6182       }
6183     }
6184     /* Merge the remaining in either j1[] or j2[] */
6185     while (b1 < e1) {
6186       j[t]      = j1[b1];
6187       imap1[t1] = t;
6188       b1 += jmap1[t1 + 1] - jmap1[t1];
6189       t1++;
6190       t++;
6191     }
6192     while (b2 < e2) {
6193       j[t]      = j2[b2];
6194       imap2[t2] = t;
6195       b2 += jmap2[t2 + 1] - jmap2[t2];
6196       t2++;
6197       t++;
6198     }
6199     i[r + 1] = t;
6200   }
6201   PetscFunctionReturn(PETSC_SUCCESS);
6202 }
6203 
6204 /*
6205   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6206 
6207   Input Parameters:
6208     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6209     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6210       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6211 
6212       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6213       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6214 
6215   Output Parameters:
6216     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6217     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6218       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6219       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6220 
6221     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6222       Atot: number of entries belonging to the diagonal block.
6223       Annz: number of unique nonzeros belonging to the diagonal block.
6224       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6225         repeats (i.e., same 'i,j' pair).
6226       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6227         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6228 
6229       Atot: number of entries belonging to the diagonal block
6230       Annz: number of unique nonzeros belonging to the diagonal block.
6231 
6232     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6233 
6234     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6235 */
6236 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6237 {
6238   PetscInt    cstart, cend, rstart, rend, row, col;
6239   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6240   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6241   PetscCount  k, m, p, q, r, s, mid;
6242   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6243 
6244   PetscFunctionBegin;
6245   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6246   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6247   m = rend - rstart;
6248 
6249   /* Skip negative rows */
6250   for (k = 0; k < n; k++)
6251     if (i[k] >= 0) break;
6252 
6253   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6254      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6255   */
6256   while (k < n) {
6257     row = i[k];
6258     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6259     for (s = k; s < n; s++)
6260       if (i[s] != row) break;
6261 
6262     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6263     for (p = k; p < s; p++) {
6264       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6265       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6266     }
6267     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6268     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6269     rowBegin[row - rstart] = k;
6270     rowMid[row - rstart]   = mid;
6271     rowEnd[row - rstart]   = s;
6272 
6273     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6274     Atot += mid - k;
6275     Btot += s - mid;
6276 
6277     /* Count unique nonzeros of this diag row */
6278     for (p = k; p < mid;) {
6279       col = j[p];
6280       do {
6281         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6282         p++;
6283       } while (p < mid && j[p] == col);
6284       Annz++;
6285     }
6286 
6287     /* Count unique nonzeros of this offdiag row */
6288     for (p = mid; p < s;) {
6289       col = j[p];
6290       do {
6291         p++;
6292       } while (p < s && j[p] == col);
6293       Bnnz++;
6294     }
6295     k = s;
6296   }
6297 
6298   /* Allocation according to Atot, Btot, Annz, Bnnz */
6299   PetscCall(PetscMalloc1(Atot, &Aperm));
6300   PetscCall(PetscMalloc1(Btot, &Bperm));
6301   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6302   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6303 
6304   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6305   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6306   for (r = 0; r < m; r++) {
6307     k   = rowBegin[r];
6308     mid = rowMid[r];
6309     s   = rowEnd[r];
6310     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6311     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6312     Atot += mid - k;
6313     Btot += s - mid;
6314 
6315     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6316     for (p = k; p < mid;) {
6317       col = j[p];
6318       q   = p;
6319       do {
6320         p++;
6321       } while (p < mid && j[p] == col);
6322       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6323       Annz++;
6324     }
6325 
6326     for (p = mid; p < s;) {
6327       col = j[p];
6328       q   = p;
6329       do {
6330         p++;
6331       } while (p < s && j[p] == col);
6332       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6333       Bnnz++;
6334     }
6335   }
6336   /* Output */
6337   *Aperm_ = Aperm;
6338   *Annz_  = Annz;
6339   *Atot_  = Atot;
6340   *Ajmap_ = Ajmap;
6341   *Bperm_ = Bperm;
6342   *Bnnz_  = Bnnz;
6343   *Btot_  = Btot;
6344   *Bjmap_ = Bjmap;
6345   PetscFunctionReturn(PETSC_SUCCESS);
6346 }
6347 
6348 /*
6349   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6350 
6351   Input Parameters:
6352     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6353     nnz:  number of unique nonzeros in the merged matrix
6354     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6355     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6356 
6357   Output Parameter: (memory is allocated by the caller)
6358     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6359 
6360   Example:
6361     nnz1 = 4
6362     nnz  = 6
6363     imap = [1,3,4,5]
6364     jmap = [0,3,5,6,7]
6365    then,
6366     jmap_new = [0,0,3,3,5,6,7]
6367 */
6368 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6369 {
6370   PetscCount k, p;
6371 
6372   PetscFunctionBegin;
6373   jmap_new[0] = 0;
6374   p           = nnz;                /* p loops over jmap_new[] backwards */
6375   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6376     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6377   }
6378   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6379   PetscFunctionReturn(PETSC_SUCCESS);
6380 }
6381 
6382 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6383 {
6384   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6385 
6386   PetscFunctionBegin;
6387   PetscCall(PetscSFDestroy(&coo->sf));
6388   PetscCall(PetscFree(coo->Aperm1));
6389   PetscCall(PetscFree(coo->Bperm1));
6390   PetscCall(PetscFree(coo->Ajmap1));
6391   PetscCall(PetscFree(coo->Bjmap1));
6392   PetscCall(PetscFree(coo->Aimap2));
6393   PetscCall(PetscFree(coo->Bimap2));
6394   PetscCall(PetscFree(coo->Aperm2));
6395   PetscCall(PetscFree(coo->Bperm2));
6396   PetscCall(PetscFree(coo->Ajmap2));
6397   PetscCall(PetscFree(coo->Bjmap2));
6398   PetscCall(PetscFree(coo->Cperm1));
6399   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6400   PetscCall(PetscFree(coo));
6401   PetscFunctionReturn(PETSC_SUCCESS);
6402 }
6403 
6404 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6405 {
6406   MPI_Comm             comm;
6407   PetscMPIInt          rank, size;
6408   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6409   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6410   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6411   PetscContainer       container;
6412   MatCOOStruct_MPIAIJ *coo;
6413 
6414   PetscFunctionBegin;
6415   PetscCall(PetscFree(mpiaij->garray));
6416   PetscCall(VecDestroy(&mpiaij->lvec));
6417 #if defined(PETSC_USE_CTABLE)
6418   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6419 #else
6420   PetscCall(PetscFree(mpiaij->colmap));
6421 #endif
6422   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6423   mat->assembled     = PETSC_FALSE;
6424   mat->was_assembled = PETSC_FALSE;
6425 
6426   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6427   PetscCallMPI(MPI_Comm_size(comm, &size));
6428   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6429   PetscCall(PetscLayoutSetUp(mat->rmap));
6430   PetscCall(PetscLayoutSetUp(mat->cmap));
6431   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6432   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6433   PetscCall(MatGetLocalSize(mat, &m, &n));
6434   PetscCall(MatGetSize(mat, &M, &N));
6435 
6436   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6437   /* entries come first, then local rows, then remote rows.                     */
6438   PetscCount n1 = coo_n, *perm1;
6439   PetscInt  *i1 = coo_i, *j1 = coo_j;
6440 
6441   PetscCall(PetscMalloc1(n1, &perm1));
6442   for (k = 0; k < n1; k++) perm1[k] = k;
6443 
6444   /* Manipulate indices so that entries with negative row or col indices will have smallest
6445      row indices, local entries will have greater but negative row indices, and remote entries
6446      will have positive row indices.
6447   */
6448   for (k = 0; k < n1; k++) {
6449     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6450     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6451     else {
6452       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6453       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6454     }
6455   }
6456 
6457   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6458   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6459 
6460   /* Advance k to the first entry we need to take care of */
6461   for (k = 0; k < n1; k++)
6462     if (i1[k] > PETSC_MIN_INT) break;
6463   PetscInt i1start = k;
6464 
6465   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6466   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6467 
6468   /*           Send remote rows to their owner                                  */
6469   /* Find which rows should be sent to which remote ranks*/
6470   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6471   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6472   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6473   const PetscInt *ranges;
6474   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6475 
6476   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6477   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6478   for (k = rem; k < n1;) {
6479     PetscMPIInt owner;
6480     PetscInt    firstRow, lastRow;
6481 
6482     /* Locate a row range */
6483     firstRow = i1[k]; /* first row of this owner */
6484     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6485     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6486 
6487     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6488     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6489 
6490     /* All entries in [k,p) belong to this remote owner */
6491     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6492       PetscMPIInt *sendto2;
6493       PetscInt    *nentries2;
6494       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6495 
6496       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6497       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6498       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6499       PetscCall(PetscFree2(sendto, nentries2));
6500       sendto   = sendto2;
6501       nentries = nentries2;
6502       maxNsend = maxNsend2;
6503     }
6504     sendto[nsend]   = owner;
6505     nentries[nsend] = p - k;
6506     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6507     nsend++;
6508     k = p;
6509   }
6510 
6511   /* Build 1st SF to know offsets on remote to send data */
6512   PetscSF      sf1;
6513   PetscInt     nroots = 1, nroots2 = 0;
6514   PetscInt     nleaves = nsend, nleaves2 = 0;
6515   PetscInt    *offsets;
6516   PetscSFNode *iremote;
6517 
6518   PetscCall(PetscSFCreate(comm, &sf1));
6519   PetscCall(PetscMalloc1(nsend, &iremote));
6520   PetscCall(PetscMalloc1(nsend, &offsets));
6521   for (k = 0; k < nsend; k++) {
6522     iremote[k].rank  = sendto[k];
6523     iremote[k].index = 0;
6524     nleaves2 += nentries[k];
6525     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6526   }
6527   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6528   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6529   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6530   PetscCall(PetscSFDestroy(&sf1));
6531   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6532 
6533   /* Build 2nd SF to send remote COOs to their owner */
6534   PetscSF sf2;
6535   nroots  = nroots2;
6536   nleaves = nleaves2;
6537   PetscCall(PetscSFCreate(comm, &sf2));
6538   PetscCall(PetscSFSetFromOptions(sf2));
6539   PetscCall(PetscMalloc1(nleaves, &iremote));
6540   p = 0;
6541   for (k = 0; k < nsend; k++) {
6542     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6543     for (q = 0; q < nentries[k]; q++, p++) {
6544       iremote[p].rank  = sendto[k];
6545       iremote[p].index = offsets[k] + q;
6546     }
6547   }
6548   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6549 
6550   /* Send the remote COOs to their owner */
6551   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6552   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6553   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6554   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6555   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6556   PetscInt *i1prem = i1 ? i1 + rem : NULL; /* silence ubsan warnings about pointer arithmetic on null pointer */
6557   PetscInt *j1prem = j1 ? j1 + rem : NULL;
6558   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6559   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6560   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6561   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6562 
6563   PetscCall(PetscFree(offsets));
6564   PetscCall(PetscFree2(sendto, nentries));
6565 
6566   /* Sort received COOs by row along with the permutation array     */
6567   for (k = 0; k < n2; k++) perm2[k] = k;
6568   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6569 
6570   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6571   PetscCount *Cperm1;
6572   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6573   PetscCount *perm1prem = perm1 ? perm1 + rem : NULL;
6574   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6575   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6576 
6577   /* Support for HYPRE matrices, kind of a hack.
6578      Swap min column with diagonal so that diagonal values will go first */
6579   PetscBool   hypre;
6580   const char *name;
6581   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6582   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6583   if (hypre) {
6584     PetscInt *minj;
6585     PetscBT   hasdiag;
6586 
6587     PetscCall(PetscBTCreate(m, &hasdiag));
6588     PetscCall(PetscMalloc1(m, &minj));
6589     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6590     for (k = i1start; k < rem; k++) {
6591       if (j1[k] < cstart || j1[k] >= cend) continue;
6592       const PetscInt rindex = i1[k] - rstart;
6593       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6594       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6595     }
6596     for (k = 0; k < n2; k++) {
6597       if (j2[k] < cstart || j2[k] >= cend) continue;
6598       const PetscInt rindex = i2[k] - rstart;
6599       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6600       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6601     }
6602     for (k = i1start; k < rem; k++) {
6603       const PetscInt rindex = i1[k] - rstart;
6604       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6605       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6606       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6607     }
6608     for (k = 0; k < n2; k++) {
6609       const PetscInt rindex = i2[k] - rstart;
6610       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6611       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6612       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6613     }
6614     PetscCall(PetscBTDestroy(&hasdiag));
6615     PetscCall(PetscFree(minj));
6616   }
6617 
6618   /* Split local COOs and received COOs into diag/offdiag portions */
6619   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6620   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6621   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6622   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6623   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6624   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6625 
6626   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6627   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6628   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6629   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6630 
6631   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6632   PetscInt *Ai, *Bi;
6633   PetscInt *Aj, *Bj;
6634 
6635   PetscCall(PetscMalloc1(m + 1, &Ai));
6636   PetscCall(PetscMalloc1(m + 1, &Bi));
6637   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6638   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6639 
6640   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6641   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6642   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6643   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6644   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6645 
6646   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6647   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6648 
6649   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6650   /* expect nonzeros in A/B most likely have local contributing entries        */
6651   PetscInt    Annz = Ai[m];
6652   PetscInt    Bnnz = Bi[m];
6653   PetscCount *Ajmap1_new, *Bjmap1_new;
6654 
6655   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6656   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6657 
6658   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6659   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6660 
6661   PetscCall(PetscFree(Aimap1));
6662   PetscCall(PetscFree(Ajmap1));
6663   PetscCall(PetscFree(Bimap1));
6664   PetscCall(PetscFree(Bjmap1));
6665   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6666   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6667   PetscCall(PetscFree(perm1));
6668   PetscCall(PetscFree3(i2, j2, perm2));
6669 
6670   Ajmap1 = Ajmap1_new;
6671   Bjmap1 = Bjmap1_new;
6672 
6673   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6674   if (Annz < Annz1 + Annz2) {
6675     PetscInt *Aj_new;
6676     PetscCall(PetscMalloc1(Annz, &Aj_new));
6677     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6678     PetscCall(PetscFree(Aj));
6679     Aj = Aj_new;
6680   }
6681 
6682   if (Bnnz < Bnnz1 + Bnnz2) {
6683     PetscInt *Bj_new;
6684     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6685     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6686     PetscCall(PetscFree(Bj));
6687     Bj = Bj_new;
6688   }
6689 
6690   /* Create new submatrices for on-process and off-process coupling                  */
6691   PetscScalar     *Aa, *Ba;
6692   MatType          rtype;
6693   Mat_SeqAIJ      *a, *b;
6694   PetscObjectState state;
6695   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6696   PetscCall(PetscCalloc1(Bnnz, &Ba));
6697   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6698   if (cstart) {
6699     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6700   }
6701 
6702   PetscCall(MatGetRootType_Private(mat, &rtype));
6703 
6704   MatSeqXAIJGetOptions_Private(mpiaij->A);
6705   PetscCall(MatDestroy(&mpiaij->A));
6706   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6707   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6708   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6709 
6710   MatSeqXAIJGetOptions_Private(mpiaij->B);
6711   PetscCall(MatDestroy(&mpiaij->B));
6712   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6713   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6714   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6715 
6716   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6717   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6718   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6719   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6720 
6721   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6722   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6723   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6724   a->free_a = b->free_a = PETSC_TRUE;
6725   a->free_ij = b->free_ij = PETSC_TRUE;
6726 
6727   /* conversion must happen AFTER multiply setup */
6728   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6729   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6730   PetscCall(VecDestroy(&mpiaij->lvec));
6731   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6732 
6733   // Put the COO struct in a container and then attach that to the matrix
6734   PetscCall(PetscMalloc1(1, &coo));
6735   coo->n       = coo_n;
6736   coo->sf      = sf2;
6737   coo->sendlen = nleaves;
6738   coo->recvlen = nroots;
6739   coo->Annz    = Annz;
6740   coo->Bnnz    = Bnnz;
6741   coo->Annz2   = Annz2;
6742   coo->Bnnz2   = Bnnz2;
6743   coo->Atot1   = Atot1;
6744   coo->Atot2   = Atot2;
6745   coo->Btot1   = Btot1;
6746   coo->Btot2   = Btot2;
6747   coo->Ajmap1  = Ajmap1;
6748   coo->Aperm1  = Aperm1;
6749   coo->Bjmap1  = Bjmap1;
6750   coo->Bperm1  = Bperm1;
6751   coo->Aimap2  = Aimap2;
6752   coo->Ajmap2  = Ajmap2;
6753   coo->Aperm2  = Aperm2;
6754   coo->Bimap2  = Bimap2;
6755   coo->Bjmap2  = Bjmap2;
6756   coo->Bperm2  = Bperm2;
6757   coo->Cperm1  = Cperm1;
6758   // Allocate in preallocation. If not used, it has zero cost on host
6759   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6760   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6761   PetscCall(PetscContainerSetPointer(container, coo));
6762   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6763   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6764   PetscCall(PetscContainerDestroy(&container));
6765   PetscFunctionReturn(PETSC_SUCCESS);
6766 }
6767 
6768 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6769 {
6770   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6771   Mat                  A = mpiaij->A, B = mpiaij->B;
6772   PetscScalar         *Aa, *Ba;
6773   PetscScalar         *sendbuf, *recvbuf;
6774   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6775   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6776   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6777   const PetscCount    *Cperm1;
6778   PetscContainer       container;
6779   MatCOOStruct_MPIAIJ *coo;
6780 
6781   PetscFunctionBegin;
6782   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6783   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6784   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6785   sendbuf = coo->sendbuf;
6786   recvbuf = coo->recvbuf;
6787   Ajmap1  = coo->Ajmap1;
6788   Ajmap2  = coo->Ajmap2;
6789   Aimap2  = coo->Aimap2;
6790   Bjmap1  = coo->Bjmap1;
6791   Bjmap2  = coo->Bjmap2;
6792   Bimap2  = coo->Bimap2;
6793   Aperm1  = coo->Aperm1;
6794   Aperm2  = coo->Aperm2;
6795   Bperm1  = coo->Bperm1;
6796   Bperm2  = coo->Bperm2;
6797   Cperm1  = coo->Cperm1;
6798 
6799   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6800   PetscCall(MatSeqAIJGetArray(B, &Ba));
6801 
6802   /* Pack entries to be sent to remote */
6803   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6804 
6805   /* Send remote entries to their owner and overlap the communication with local computation */
6806   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6807   /* Add local entries to A and B */
6808   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6809     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6810     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6811     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6812   }
6813   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6814     PetscScalar sum = 0.0;
6815     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6816     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6817   }
6818   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6819 
6820   /* Add received remote entries to A and B */
6821   for (PetscCount i = 0; i < coo->Annz2; i++) {
6822     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6823   }
6824   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6825     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6826   }
6827   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6828   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6829   PetscFunctionReturn(PETSC_SUCCESS);
6830 }
6831 
6832 /*MC
6833    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6834 
6835    Options Database Keys:
6836 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6837 
6838    Level: beginner
6839 
6840    Notes:
6841    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6842     in this case the values associated with the rows and columns one passes in are set to zero
6843     in the matrix
6844 
6845     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6846     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6847 
6848 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6849 M*/
6850 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6851 {
6852   Mat_MPIAIJ *b;
6853   PetscMPIInt size;
6854 
6855   PetscFunctionBegin;
6856   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6857 
6858   PetscCall(PetscNew(&b));
6859   B->data       = (void *)b;
6860   B->ops[0]     = MatOps_Values;
6861   B->assembled  = PETSC_FALSE;
6862   B->insertmode = NOT_SET_VALUES;
6863   b->size       = size;
6864 
6865   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6866 
6867   /* build cache for off array entries formed */
6868   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6869 
6870   b->donotstash  = PETSC_FALSE;
6871   b->colmap      = NULL;
6872   b->garray      = NULL;
6873   b->roworiented = PETSC_TRUE;
6874 
6875   /* stuff used for matrix vector multiply */
6876   b->lvec  = NULL;
6877   b->Mvctx = NULL;
6878 
6879   /* stuff for MatGetRow() */
6880   b->rowindices   = NULL;
6881   b->rowvalues    = NULL;
6882   b->getrowactive = PETSC_FALSE;
6883 
6884   /* flexible pointer used in CUSPARSE classes */
6885   b->spptr = NULL;
6886 
6887   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6888   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6889   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6890   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6891   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6892   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6893   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6894   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6897 #if defined(PETSC_HAVE_CUDA)
6898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6899 #endif
6900 #if defined(PETSC_HAVE_HIP)
6901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6902 #endif
6903 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6905 #endif
6906 #if defined(PETSC_HAVE_MKL_SPARSE)
6907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6908 #endif
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6913 #if defined(PETSC_HAVE_ELEMENTAL)
6914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6915 #endif
6916 #if defined(PETSC_HAVE_SCALAPACK)
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6918 #endif
6919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6921 #if defined(PETSC_HAVE_HYPRE)
6922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6924 #endif
6925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6929   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6930   PetscFunctionReturn(PETSC_SUCCESS);
6931 }
6932 
6933 /*@C
6934   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6935   and "off-diagonal" part of the matrix in CSR format.
6936 
6937   Collective
6938 
6939   Input Parameters:
6940 + comm - MPI communicator
6941 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6942 . n    - This value should be the same as the local size used in creating the
6943          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6944          calculated if `N` is given) For square matrices `n` is almost always `m`.
6945 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6946 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6947 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6948 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6949 . a    - matrix values
6950 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6951 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6952 - oa   - matrix values
6953 
6954   Output Parameter:
6955 . mat - the matrix
6956 
6957   Level: advanced
6958 
6959   Notes:
6960   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6961   must free the arrays once the matrix has been destroyed and not before.
6962 
6963   The `i` and `j` indices are 0 based
6964 
6965   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6966 
6967   This sets local rows and cannot be used to set off-processor values.
6968 
6969   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6970   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6971   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6972   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6973   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6974   communication if it is known that only local entries will be set.
6975 
6976 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6977           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6978 @*/
6979 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6980 {
6981   Mat_MPIAIJ *maij;
6982 
6983   PetscFunctionBegin;
6984   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6985   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6986   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6987   PetscCall(MatCreate(comm, mat));
6988   PetscCall(MatSetSizes(*mat, m, n, M, N));
6989   PetscCall(MatSetType(*mat, MATMPIAIJ));
6990   maij = (Mat_MPIAIJ *)(*mat)->data;
6991 
6992   (*mat)->preallocated = PETSC_TRUE;
6993 
6994   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6995   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6996 
6997   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6998   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6999 
7000   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7001   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7002   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7003   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7004   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7005   PetscFunctionReturn(PETSC_SUCCESS);
7006 }
7007 
7008 typedef struct {
7009   Mat       *mp;    /* intermediate products */
7010   PetscBool *mptmp; /* is the intermediate product temporary ? */
7011   PetscInt   cp;    /* number of intermediate products */
7012 
7013   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7014   PetscInt    *startsj_s, *startsj_r;
7015   PetscScalar *bufa;
7016   Mat          P_oth;
7017 
7018   /* may take advantage of merging product->B */
7019   Mat Bloc; /* B-local by merging diag and off-diag */
7020 
7021   /* cusparse does not have support to split between symbolic and numeric phases.
7022      When api_user is true, we don't need to update the numerical values
7023      of the temporary storage */
7024   PetscBool reusesym;
7025 
7026   /* support for COO values insertion */
7027   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7028   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7029   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7030   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7031   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7032   PetscMemType mtype;
7033 
7034   /* customization */
7035   PetscBool abmerge;
7036   PetscBool P_oth_bind;
7037 } MatMatMPIAIJBACKEND;
7038 
7039 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7040 {
7041   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7042   PetscInt             i;
7043 
7044   PetscFunctionBegin;
7045   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7046   PetscCall(PetscFree(mmdata->bufa));
7047   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7048   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7049   PetscCall(MatDestroy(&mmdata->P_oth));
7050   PetscCall(MatDestroy(&mmdata->Bloc));
7051   PetscCall(PetscSFDestroy(&mmdata->sf));
7052   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7053   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7054   PetscCall(PetscFree(mmdata->own[0]));
7055   PetscCall(PetscFree(mmdata->own));
7056   PetscCall(PetscFree(mmdata->off[0]));
7057   PetscCall(PetscFree(mmdata->off));
7058   PetscCall(PetscFree(mmdata));
7059   PetscFunctionReturn(PETSC_SUCCESS);
7060 }
7061 
7062 /* Copy selected n entries with indices in idx[] of A to v[].
7063    If idx is NULL, copy the whole data array of A to v[]
7064  */
7065 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7066 {
7067   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7068 
7069   PetscFunctionBegin;
7070   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7071   if (f) {
7072     PetscCall((*f)(A, n, idx, v));
7073   } else {
7074     const PetscScalar *vv;
7075 
7076     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7077     if (n && idx) {
7078       PetscScalar    *w  = v;
7079       const PetscInt *oi = idx;
7080       PetscInt        j;
7081 
7082       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7083     } else {
7084       PetscCall(PetscArraycpy(v, vv, n));
7085     }
7086     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7087   }
7088   PetscFunctionReturn(PETSC_SUCCESS);
7089 }
7090 
7091 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7092 {
7093   MatMatMPIAIJBACKEND *mmdata;
7094   PetscInt             i, n_d, n_o;
7095 
7096   PetscFunctionBegin;
7097   MatCheckProduct(C, 1);
7098   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7099   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7100   if (!mmdata->reusesym) { /* update temporary matrices */
7101     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7102     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7103   }
7104   mmdata->reusesym = PETSC_FALSE;
7105 
7106   for (i = 0; i < mmdata->cp; i++) {
7107     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7108     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7109   }
7110   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7111     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7112 
7113     if (mmdata->mptmp[i]) continue;
7114     if (noff) {
7115       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7116 
7117       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7118       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7119       n_o += noff;
7120       n_d += nown;
7121     } else {
7122       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7123 
7124       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7125       n_d += mm->nz;
7126     }
7127   }
7128   if (mmdata->hasoffproc) { /* offprocess insertion */
7129     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7130     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7131   }
7132   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7133   PetscFunctionReturn(PETSC_SUCCESS);
7134 }
7135 
7136 /* Support for Pt * A, A * P, or Pt * A * P */
7137 #define MAX_NUMBER_INTERMEDIATE 4
7138 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7139 {
7140   Mat_Product           *product = C->product;
7141   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7142   Mat_MPIAIJ            *a, *p;
7143   MatMatMPIAIJBACKEND   *mmdata;
7144   ISLocalToGlobalMapping P_oth_l2g = NULL;
7145   IS                     glob      = NULL;
7146   const char            *prefix;
7147   char                   pprefix[256];
7148   const PetscInt        *globidx, *P_oth_idx;
7149   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7150   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7151   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7152                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7153                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7154   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7155 
7156   MatProductType ptype;
7157   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7158   PetscMPIInt    size;
7159 
7160   PetscFunctionBegin;
7161   MatCheckProduct(C, 1);
7162   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7163   ptype = product->type;
7164   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7165     ptype                                          = MATPRODUCT_AB;
7166     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7167   }
7168   switch (ptype) {
7169   case MATPRODUCT_AB:
7170     A          = product->A;
7171     P          = product->B;
7172     m          = A->rmap->n;
7173     n          = P->cmap->n;
7174     M          = A->rmap->N;
7175     N          = P->cmap->N;
7176     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7177     break;
7178   case MATPRODUCT_AtB:
7179     P          = product->A;
7180     A          = product->B;
7181     m          = P->cmap->n;
7182     n          = A->cmap->n;
7183     M          = P->cmap->N;
7184     N          = A->cmap->N;
7185     hasoffproc = PETSC_TRUE;
7186     break;
7187   case MATPRODUCT_PtAP:
7188     A          = product->A;
7189     P          = product->B;
7190     m          = P->cmap->n;
7191     n          = P->cmap->n;
7192     M          = P->cmap->N;
7193     N          = P->cmap->N;
7194     hasoffproc = PETSC_TRUE;
7195     break;
7196   default:
7197     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7198   }
7199   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7200   if (size == 1) hasoffproc = PETSC_FALSE;
7201 
7202   /* defaults */
7203   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7204     mp[i]    = NULL;
7205     mptmp[i] = PETSC_FALSE;
7206     rmapt[i] = -1;
7207     cmapt[i] = -1;
7208     rmapa[i] = NULL;
7209     cmapa[i] = NULL;
7210   }
7211 
7212   /* customization */
7213   PetscCall(PetscNew(&mmdata));
7214   mmdata->reusesym = product->api_user;
7215   if (ptype == MATPRODUCT_AB) {
7216     if (product->api_user) {
7217       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7218       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7219       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7220       PetscOptionsEnd();
7221     } else {
7222       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7223       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7224       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7225       PetscOptionsEnd();
7226     }
7227   } else if (ptype == MATPRODUCT_PtAP) {
7228     if (product->api_user) {
7229       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7230       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7231       PetscOptionsEnd();
7232     } else {
7233       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7234       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7235       PetscOptionsEnd();
7236     }
7237   }
7238   a = (Mat_MPIAIJ *)A->data;
7239   p = (Mat_MPIAIJ *)P->data;
7240   PetscCall(MatSetSizes(C, m, n, M, N));
7241   PetscCall(PetscLayoutSetUp(C->rmap));
7242   PetscCall(PetscLayoutSetUp(C->cmap));
7243   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7244   PetscCall(MatGetOptionsPrefix(C, &prefix));
7245 
7246   cp = 0;
7247   switch (ptype) {
7248   case MATPRODUCT_AB: /* A * P */
7249     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7250 
7251     /* A_diag * P_local (merged or not) */
7252     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7253       /* P is product->B */
7254       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7255       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7256       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7257       PetscCall(MatProductSetFill(mp[cp], product->fill));
7258       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7259       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7260       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7261       mp[cp]->product->api_user = product->api_user;
7262       PetscCall(MatProductSetFromOptions(mp[cp]));
7263       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7264       PetscCall(ISGetIndices(glob, &globidx));
7265       rmapt[cp] = 1;
7266       cmapt[cp] = 2;
7267       cmapa[cp] = globidx;
7268       mptmp[cp] = PETSC_FALSE;
7269       cp++;
7270     } else { /* A_diag * P_diag and A_diag * P_off */
7271       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7272       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7273       PetscCall(MatProductSetFill(mp[cp], product->fill));
7274       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7275       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7276       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7277       mp[cp]->product->api_user = product->api_user;
7278       PetscCall(MatProductSetFromOptions(mp[cp]));
7279       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7280       rmapt[cp] = 1;
7281       cmapt[cp] = 1;
7282       mptmp[cp] = PETSC_FALSE;
7283       cp++;
7284       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7285       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7286       PetscCall(MatProductSetFill(mp[cp], product->fill));
7287       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7288       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7289       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7290       mp[cp]->product->api_user = product->api_user;
7291       PetscCall(MatProductSetFromOptions(mp[cp]));
7292       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7293       rmapt[cp] = 1;
7294       cmapt[cp] = 2;
7295       cmapa[cp] = p->garray;
7296       mptmp[cp] = PETSC_FALSE;
7297       cp++;
7298     }
7299 
7300     /* A_off * P_other */
7301     if (mmdata->P_oth) {
7302       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7303       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7304       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7305       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7306       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7307       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7308       PetscCall(MatProductSetFill(mp[cp], product->fill));
7309       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7310       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7311       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7312       mp[cp]->product->api_user = product->api_user;
7313       PetscCall(MatProductSetFromOptions(mp[cp]));
7314       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7315       rmapt[cp] = 1;
7316       cmapt[cp] = 2;
7317       cmapa[cp] = P_oth_idx;
7318       mptmp[cp] = PETSC_FALSE;
7319       cp++;
7320     }
7321     break;
7322 
7323   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7324     /* A is product->B */
7325     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7326     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7327       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7328       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7329       PetscCall(MatProductSetFill(mp[cp], product->fill));
7330       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7331       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7332       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7333       mp[cp]->product->api_user = product->api_user;
7334       PetscCall(MatProductSetFromOptions(mp[cp]));
7335       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7336       PetscCall(ISGetIndices(glob, &globidx));
7337       rmapt[cp] = 2;
7338       rmapa[cp] = globidx;
7339       cmapt[cp] = 2;
7340       cmapa[cp] = globidx;
7341       mptmp[cp] = PETSC_FALSE;
7342       cp++;
7343     } else {
7344       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7345       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7346       PetscCall(MatProductSetFill(mp[cp], product->fill));
7347       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7348       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7349       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7350       mp[cp]->product->api_user = product->api_user;
7351       PetscCall(MatProductSetFromOptions(mp[cp]));
7352       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7353       PetscCall(ISGetIndices(glob, &globidx));
7354       rmapt[cp] = 1;
7355       cmapt[cp] = 2;
7356       cmapa[cp] = globidx;
7357       mptmp[cp] = PETSC_FALSE;
7358       cp++;
7359       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7360       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7361       PetscCall(MatProductSetFill(mp[cp], product->fill));
7362       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7363       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7364       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7365       mp[cp]->product->api_user = product->api_user;
7366       PetscCall(MatProductSetFromOptions(mp[cp]));
7367       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7368       rmapt[cp] = 2;
7369       rmapa[cp] = p->garray;
7370       cmapt[cp] = 2;
7371       cmapa[cp] = globidx;
7372       mptmp[cp] = PETSC_FALSE;
7373       cp++;
7374     }
7375     break;
7376   case MATPRODUCT_PtAP:
7377     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7378     /* P is product->B */
7379     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7380     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7381     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7382     PetscCall(MatProductSetFill(mp[cp], product->fill));
7383     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7384     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7385     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7386     mp[cp]->product->api_user = product->api_user;
7387     PetscCall(MatProductSetFromOptions(mp[cp]));
7388     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7389     PetscCall(ISGetIndices(glob, &globidx));
7390     rmapt[cp] = 2;
7391     rmapa[cp] = globidx;
7392     cmapt[cp] = 2;
7393     cmapa[cp] = globidx;
7394     mptmp[cp] = PETSC_FALSE;
7395     cp++;
7396     if (mmdata->P_oth) {
7397       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7398       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7399       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7400       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7401       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7402       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7403       PetscCall(MatProductSetFill(mp[cp], product->fill));
7404       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7405       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7406       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7407       mp[cp]->product->api_user = product->api_user;
7408       PetscCall(MatProductSetFromOptions(mp[cp]));
7409       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7410       mptmp[cp] = PETSC_TRUE;
7411       cp++;
7412       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7413       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7414       PetscCall(MatProductSetFill(mp[cp], product->fill));
7415       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7416       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7417       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7418       mp[cp]->product->api_user = product->api_user;
7419       PetscCall(MatProductSetFromOptions(mp[cp]));
7420       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7421       rmapt[cp] = 2;
7422       rmapa[cp] = globidx;
7423       cmapt[cp] = 2;
7424       cmapa[cp] = P_oth_idx;
7425       mptmp[cp] = PETSC_FALSE;
7426       cp++;
7427     }
7428     break;
7429   default:
7430     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7431   }
7432   /* sanity check */
7433   if (size > 1)
7434     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7435 
7436   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7437   for (i = 0; i < cp; i++) {
7438     mmdata->mp[i]    = mp[i];
7439     mmdata->mptmp[i] = mptmp[i];
7440   }
7441   mmdata->cp             = cp;
7442   C->product->data       = mmdata;
7443   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7444   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7445 
7446   /* memory type */
7447   mmdata->mtype = PETSC_MEMTYPE_HOST;
7448   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7449   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7450   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7451   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7452   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7453   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7454 
7455   /* prepare coo coordinates for values insertion */
7456 
7457   /* count total nonzeros of those intermediate seqaij Mats
7458     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7459     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7460     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7461   */
7462   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7463     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7464     if (mptmp[cp]) continue;
7465     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7466       const PetscInt *rmap = rmapa[cp];
7467       const PetscInt  mr   = mp[cp]->rmap->n;
7468       const PetscInt  rs   = C->rmap->rstart;
7469       const PetscInt  re   = C->rmap->rend;
7470       const PetscInt *ii   = mm->i;
7471       for (i = 0; i < mr; i++) {
7472         const PetscInt gr = rmap[i];
7473         const PetscInt nz = ii[i + 1] - ii[i];
7474         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7475         else ncoo_oown += nz;                  /* this row is local */
7476       }
7477     } else ncoo_d += mm->nz;
7478   }
7479 
7480   /*
7481     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7482 
7483     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7484 
7485     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7486 
7487     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7488     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7489     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7490 
7491     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7492     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7493   */
7494   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7495   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7496 
7497   /* gather (i,j) of nonzeros inserted by remote procs */
7498   if (hasoffproc) {
7499     PetscSF  msf;
7500     PetscInt ncoo2, *coo_i2, *coo_j2;
7501 
7502     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7503     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7504     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7505 
7506     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7507       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7508       PetscInt   *idxoff = mmdata->off[cp];
7509       PetscInt   *idxown = mmdata->own[cp];
7510       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7511         const PetscInt *rmap = rmapa[cp];
7512         const PetscInt *cmap = cmapa[cp];
7513         const PetscInt *ii   = mm->i;
7514         PetscInt       *coi  = coo_i + ncoo_o;
7515         PetscInt       *coj  = coo_j + ncoo_o;
7516         const PetscInt  mr   = mp[cp]->rmap->n;
7517         const PetscInt  rs   = C->rmap->rstart;
7518         const PetscInt  re   = C->rmap->rend;
7519         const PetscInt  cs   = C->cmap->rstart;
7520         for (i = 0; i < mr; i++) {
7521           const PetscInt *jj = mm->j + ii[i];
7522           const PetscInt  gr = rmap[i];
7523           const PetscInt  nz = ii[i + 1] - ii[i];
7524           if (gr < rs || gr >= re) { /* this is an offproc row */
7525             for (j = ii[i]; j < ii[i + 1]; j++) {
7526               *coi++    = gr;
7527               *idxoff++ = j;
7528             }
7529             if (!cmapt[cp]) { /* already global */
7530               for (j = 0; j < nz; j++) *coj++ = jj[j];
7531             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7532               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7533             } else { /* offdiag */
7534               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7535             }
7536             ncoo_o += nz;
7537           } else { /* this is a local row */
7538             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7539           }
7540         }
7541       }
7542       mmdata->off[cp + 1] = idxoff;
7543       mmdata->own[cp + 1] = idxown;
7544     }
7545 
7546     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7547     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7548     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7549     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7550     ncoo = ncoo_d + ncoo_oown + ncoo2;
7551     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7552     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7553     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7554     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7555     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7556     PetscCall(PetscFree2(coo_i, coo_j));
7557     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7558     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7559     coo_i = coo_i2;
7560     coo_j = coo_j2;
7561   } else { /* no offproc values insertion */
7562     ncoo = ncoo_d;
7563     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7564 
7565     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7566     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7567     PetscCall(PetscSFSetUp(mmdata->sf));
7568   }
7569   mmdata->hasoffproc = hasoffproc;
7570 
7571   /* gather (i,j) of nonzeros inserted locally */
7572   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7573     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7574     PetscInt       *coi  = coo_i + ncoo_d;
7575     PetscInt       *coj  = coo_j + ncoo_d;
7576     const PetscInt *jj   = mm->j;
7577     const PetscInt *ii   = mm->i;
7578     const PetscInt *cmap = cmapa[cp];
7579     const PetscInt *rmap = rmapa[cp];
7580     const PetscInt  mr   = mp[cp]->rmap->n;
7581     const PetscInt  rs   = C->rmap->rstart;
7582     const PetscInt  re   = C->rmap->rend;
7583     const PetscInt  cs   = C->cmap->rstart;
7584 
7585     if (mptmp[cp]) continue;
7586     if (rmapt[cp] == 1) { /* consecutive rows */
7587       /* fill coo_i */
7588       for (i = 0; i < mr; i++) {
7589         const PetscInt gr = i + rs;
7590         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7591       }
7592       /* fill coo_j */
7593       if (!cmapt[cp]) { /* type-0, already global */
7594         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7595       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7596         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7597       } else {                                            /* type-2, local to global for sparse columns */
7598         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7599       }
7600       ncoo_d += mm->nz;
7601     } else if (rmapt[cp] == 2) { /* sparse rows */
7602       for (i = 0; i < mr; i++) {
7603         const PetscInt *jj = mm->j + ii[i];
7604         const PetscInt  gr = rmap[i];
7605         const PetscInt  nz = ii[i + 1] - ii[i];
7606         if (gr >= rs && gr < re) { /* local rows */
7607           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7608           if (!cmapt[cp]) { /* type-0, already global */
7609             for (j = 0; j < nz; j++) *coj++ = jj[j];
7610           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7611             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7612           } else { /* type-2, local to global for sparse columns */
7613             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7614           }
7615           ncoo_d += nz;
7616         }
7617       }
7618     }
7619   }
7620   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7621   PetscCall(ISDestroy(&glob));
7622   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7623   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7624   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7625   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7626 
7627   /* preallocate with COO data */
7628   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7629   PetscCall(PetscFree2(coo_i, coo_j));
7630   PetscFunctionReturn(PETSC_SUCCESS);
7631 }
7632 
7633 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7634 {
7635   Mat_Product *product = mat->product;
7636 #if defined(PETSC_HAVE_DEVICE)
7637   PetscBool match  = PETSC_FALSE;
7638   PetscBool usecpu = PETSC_FALSE;
7639 #else
7640   PetscBool match = PETSC_TRUE;
7641 #endif
7642 
7643   PetscFunctionBegin;
7644   MatCheckProduct(mat, 1);
7645 #if defined(PETSC_HAVE_DEVICE)
7646   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7647   if (match) { /* we can always fallback to the CPU if requested */
7648     switch (product->type) {
7649     case MATPRODUCT_AB:
7650       if (product->api_user) {
7651         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7652         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7653         PetscOptionsEnd();
7654       } else {
7655         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7656         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7657         PetscOptionsEnd();
7658       }
7659       break;
7660     case MATPRODUCT_AtB:
7661       if (product->api_user) {
7662         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7663         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7664         PetscOptionsEnd();
7665       } else {
7666         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7667         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7668         PetscOptionsEnd();
7669       }
7670       break;
7671     case MATPRODUCT_PtAP:
7672       if (product->api_user) {
7673         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7674         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7675         PetscOptionsEnd();
7676       } else {
7677         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7678         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7679         PetscOptionsEnd();
7680       }
7681       break;
7682     default:
7683       break;
7684     }
7685     match = (PetscBool)!usecpu;
7686   }
7687 #endif
7688   if (match) {
7689     switch (product->type) {
7690     case MATPRODUCT_AB:
7691     case MATPRODUCT_AtB:
7692     case MATPRODUCT_PtAP:
7693       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7694       break;
7695     default:
7696       break;
7697     }
7698   }
7699   /* fallback to MPIAIJ ops */
7700   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7701   PetscFunctionReturn(PETSC_SUCCESS);
7702 }
7703 
7704 /*
7705    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7706 
7707    n - the number of block indices in cc[]
7708    cc - the block indices (must be large enough to contain the indices)
7709 */
7710 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7711 {
7712   PetscInt        cnt = -1, nidx, j;
7713   const PetscInt *idx;
7714 
7715   PetscFunctionBegin;
7716   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7717   if (nidx) {
7718     cnt     = 0;
7719     cc[cnt] = idx[0] / bs;
7720     for (j = 1; j < nidx; j++) {
7721       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7722     }
7723   }
7724   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7725   *n = cnt + 1;
7726   PetscFunctionReturn(PETSC_SUCCESS);
7727 }
7728 
7729 /*
7730     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7731 
7732     ncollapsed - the number of block indices
7733     collapsed - the block indices (must be large enough to contain the indices)
7734 */
7735 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7736 {
7737   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7738 
7739   PetscFunctionBegin;
7740   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7741   for (i = start + 1; i < start + bs; i++) {
7742     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7743     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7744     cprevtmp = cprev;
7745     cprev    = merged;
7746     merged   = cprevtmp;
7747   }
7748   *ncollapsed = nprev;
7749   if (collapsed) *collapsed = cprev;
7750   PetscFunctionReturn(PETSC_SUCCESS);
7751 }
7752 
7753 /*
7754  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7755 
7756  Input Parameter:
7757  . Amat - matrix
7758  - symmetrize - make the result symmetric
7759  + scale - scale with diagonal
7760 
7761  Output Parameter:
7762  . a_Gmat - output scalar graph >= 0
7763 
7764 */
7765 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7766 {
7767   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7768   MPI_Comm  comm;
7769   Mat       Gmat;
7770   PetscBool ismpiaij, isseqaij;
7771   Mat       a, b, c;
7772   MatType   jtype;
7773 
7774   PetscFunctionBegin;
7775   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7776   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7777   PetscCall(MatGetSize(Amat, &MM, &NN));
7778   PetscCall(MatGetBlockSize(Amat, &bs));
7779   nloc = (Iend - Istart) / bs;
7780 
7781   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7782   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7783   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7784 
7785   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7786   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7787      implementation */
7788   if (bs > 1) {
7789     PetscCall(MatGetType(Amat, &jtype));
7790     PetscCall(MatCreate(comm, &Gmat));
7791     PetscCall(MatSetType(Gmat, jtype));
7792     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7793     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7794     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7795       PetscInt  *d_nnz, *o_nnz;
7796       MatScalar *aa, val, *AA;
7797       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7798       if (isseqaij) {
7799         a = Amat;
7800         b = NULL;
7801       } else {
7802         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7803         a             = d->A;
7804         b             = d->B;
7805       }
7806       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7807       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7808       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7809         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7810         const PetscInt *cols1, *cols2;
7811         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7812           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7813           nnz[brow / bs] = nc2 / bs;
7814           if (nc2 % bs) ok = 0;
7815           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7816           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7817             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7818             if (nc1 != nc2) ok = 0;
7819             else {
7820               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7821                 if (cols1[jj] != cols2[jj]) ok = 0;
7822                 if (cols1[jj] % bs != jj % bs) ok = 0;
7823               }
7824             }
7825             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7826           }
7827           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7828           if (!ok) {
7829             PetscCall(PetscFree2(d_nnz, o_nnz));
7830             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7831             goto old_bs;
7832           }
7833         }
7834       }
7835       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7836       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7837       PetscCall(PetscFree2(d_nnz, o_nnz));
7838       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7839       // diag
7840       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7841         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7842         ai               = aseq->i;
7843         n                = ai[brow + 1] - ai[brow];
7844         aj               = aseq->j + ai[brow];
7845         for (int k = 0; k < n; k += bs) {        // block columns
7846           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7847           val        = 0;
7848           if (index_size == 0) {
7849             for (int ii = 0; ii < bs; ii++) { // rows in block
7850               aa = aseq->a + ai[brow + ii] + k;
7851               for (int jj = 0; jj < bs; jj++) {         // columns in block
7852                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7853               }
7854             }
7855           } else {                                       // use (index,index) value if provided
7856             for (int iii = 0; iii < index_size; iii++) { // rows in block
7857               int ii = index[iii];
7858               aa     = aseq->a + ai[brow + ii] + k;
7859               for (int jjj = 0; jjj < index_size; jjj++) { // columns in block
7860                 int jj = index[jjj];
7861                 val += PetscAbs(PetscRealPart(aa[jj]));
7862               }
7863             }
7864           }
7865           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7866           AA[k / bs] = val;
7867         }
7868         grow = Istart / bs + brow / bs;
7869         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7870       }
7871       // off-diag
7872       if (ismpiaij) {
7873         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7874         const PetscScalar *vals;
7875         const PetscInt    *cols, *garray = aij->garray;
7876         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7877         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7878           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7879           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7880             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7881             AA[k / bs] = 0;
7882             AJ[cidx]   = garray[cols[k]] / bs;
7883           }
7884           nc = ncols / bs;
7885           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7886           if (index_size == 0) {
7887             for (int ii = 0; ii < bs; ii++) { // rows in block
7888               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7889               for (int k = 0; k < ncols; k += bs) {
7890                 for (int jj = 0; jj < bs; jj++) { // cols in block
7891                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7892                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7893                 }
7894               }
7895               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7896             }
7897           } else {                                       // use (index,index) value if provided
7898             for (int iii = 0; iii < index_size; iii++) { // rows in block
7899               int ii = index[iii];
7900               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7901               for (int k = 0; k < ncols; k += bs) {
7902                 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block
7903                   int jj = index[jjj];
7904                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7905                 }
7906               }
7907               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7908             }
7909           }
7910           grow = Istart / bs + brow / bs;
7911           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7912         }
7913       }
7914       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7915       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7916       PetscCall(PetscFree2(AA, AJ));
7917     } else {
7918       const PetscScalar *vals;
7919       const PetscInt    *idx;
7920       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7921     old_bs:
7922       /*
7923        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7924        */
7925       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7926       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7927       if (isseqaij) {
7928         PetscInt max_d_nnz;
7929         /*
7930          Determine exact preallocation count for (sequential) scalar matrix
7931          */
7932         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7933         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7934         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7935         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7936         PetscCall(PetscFree3(w0, w1, w2));
7937       } else if (ismpiaij) {
7938         Mat             Daij, Oaij;
7939         const PetscInt *garray;
7940         PetscInt        max_d_nnz;
7941         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7942         /*
7943          Determine exact preallocation count for diagonal block portion of scalar matrix
7944          */
7945         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7946         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7947         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7948         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7949         PetscCall(PetscFree3(w0, w1, w2));
7950         /*
7951          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7952          */
7953         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7954           o_nnz[jj] = 0;
7955           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7956             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7957             o_nnz[jj] += ncols;
7958             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7959           }
7960           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7961         }
7962       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7963       /* get scalar copy (norms) of matrix */
7964       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7965       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7966       PetscCall(PetscFree2(d_nnz, o_nnz));
7967       for (Ii = Istart; Ii < Iend; Ii++) {
7968         PetscInt dest_row = Ii / bs;
7969         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7970         for (jj = 0; jj < ncols; jj++) {
7971           PetscInt    dest_col = idx[jj] / bs;
7972           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7973           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7974         }
7975         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7976       }
7977       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7978       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7979     }
7980   } else {
7981     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7982     else {
7983       Gmat = Amat;
7984       PetscCall(PetscObjectReference((PetscObject)Gmat));
7985     }
7986     if (isseqaij) {
7987       a = Gmat;
7988       b = NULL;
7989     } else {
7990       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7991       a             = d->A;
7992       b             = d->B;
7993     }
7994     if (filter >= 0 || scale) {
7995       /* take absolute value of each entry */
7996       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7997         MatInfo      info;
7998         PetscScalar *avals;
7999         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8000         PetscCall(MatSeqAIJGetArray(c, &avals));
8001         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8002         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8003       }
8004     }
8005   }
8006   if (symmetrize) {
8007     PetscBool isset, issym;
8008     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8009     if (!isset || !issym) {
8010       Mat matTrans;
8011       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8012       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8013       PetscCall(MatDestroy(&matTrans));
8014     }
8015     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8016   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8017   if (scale) {
8018     /* scale c for all diagonal values = 1 or -1 */
8019     Vec diag;
8020     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8021     PetscCall(MatGetDiagonal(Gmat, diag));
8022     PetscCall(VecReciprocal(diag));
8023     PetscCall(VecSqrtAbs(diag));
8024     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8025     PetscCall(VecDestroy(&diag));
8026   }
8027   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8028 
8029   if (filter >= 0) {
8030     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8031     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8032   }
8033   *a_Gmat = Gmat;
8034   PetscFunctionReturn(PETSC_SUCCESS);
8035 }
8036 
8037 /*
8038     Special version for direct calls from Fortran
8039 */
8040 
8041 /* Change these macros so can be used in void function */
8042 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8043 #undef PetscCall
8044 #define PetscCall(...) \
8045   do { \
8046     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8047     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8048       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8049       return; \
8050     } \
8051   } while (0)
8052 
8053 #undef SETERRQ
8054 #define SETERRQ(comm, ierr, ...) \
8055   do { \
8056     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8057     return; \
8058   } while (0)
8059 
8060 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8061   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8062 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8063   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8064 #else
8065 #endif
8066 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8067 {
8068   Mat         mat = *mmat;
8069   PetscInt    m = *mm, n = *mn;
8070   InsertMode  addv = *maddv;
8071   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8072   PetscScalar value;
8073 
8074   MatCheckPreallocated(mat, 1);
8075   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8076   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8077   {
8078     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8079     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8080     PetscBool roworiented = aij->roworiented;
8081 
8082     /* Some Variables required in the macro */
8083     Mat         A     = aij->A;
8084     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8085     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8086     MatScalar  *aa;
8087     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8088     Mat         B                 = aij->B;
8089     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8090     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8091     MatScalar  *ba;
8092     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8093      * cannot use "#if defined" inside a macro. */
8094     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8095 
8096     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8097     PetscInt   nonew = a->nonew;
8098     MatScalar *ap1, *ap2;
8099 
8100     PetscFunctionBegin;
8101     PetscCall(MatSeqAIJGetArray(A, &aa));
8102     PetscCall(MatSeqAIJGetArray(B, &ba));
8103     for (i = 0; i < m; i++) {
8104       if (im[i] < 0) continue;
8105       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8106       if (im[i] >= rstart && im[i] < rend) {
8107         row      = im[i] - rstart;
8108         lastcol1 = -1;
8109         rp1      = aj + ai[row];
8110         ap1      = aa + ai[row];
8111         rmax1    = aimax[row];
8112         nrow1    = ailen[row];
8113         low1     = 0;
8114         high1    = nrow1;
8115         lastcol2 = -1;
8116         rp2      = bj + bi[row];
8117         ap2      = ba + bi[row];
8118         rmax2    = bimax[row];
8119         nrow2    = bilen[row];
8120         low2     = 0;
8121         high2    = nrow2;
8122 
8123         for (j = 0; j < n; j++) {
8124           if (roworiented) value = v[i * n + j];
8125           else value = v[i + j * m];
8126           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8127           if (in[j] >= cstart && in[j] < cend) {
8128             col = in[j] - cstart;
8129             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8130           } else if (in[j] < 0) continue;
8131           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8132             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8133           } else {
8134             if (mat->was_assembled) {
8135               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8136 #if defined(PETSC_USE_CTABLE)
8137               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8138               col--;
8139 #else
8140               col = aij->colmap[in[j]] - 1;
8141 #endif
8142               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8143                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8144                 col = in[j];
8145                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8146                 B        = aij->B;
8147                 b        = (Mat_SeqAIJ *)B->data;
8148                 bimax    = b->imax;
8149                 bi       = b->i;
8150                 bilen    = b->ilen;
8151                 bj       = b->j;
8152                 rp2      = bj + bi[row];
8153                 ap2      = ba + bi[row];
8154                 rmax2    = bimax[row];
8155                 nrow2    = bilen[row];
8156                 low2     = 0;
8157                 high2    = nrow2;
8158                 bm       = aij->B->rmap->n;
8159                 ba       = b->a;
8160                 inserted = PETSC_FALSE;
8161               }
8162             } else col = in[j];
8163             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8164           }
8165         }
8166       } else if (!aij->donotstash) {
8167         if (roworiented) {
8168           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8169         } else {
8170           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8171         }
8172       }
8173     }
8174     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8175     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8176   }
8177   PetscFunctionReturnVoid();
8178 }
8179 
8180 /* Undefining these here since they were redefined from their original definition above! No
8181  * other PETSc functions should be defined past this point, as it is impossible to recover the
8182  * original definitions */
8183 #undef PetscCall
8184 #undef SETERRQ
8185