xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8564c97f3fe574910f676ffb31bf76fa44548916)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
170 {
171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
172 
173   PetscFunctionBegin;
174   if (mat->A) {
175     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
176     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
182 {
183   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
184   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
185   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
186   const PetscInt  *ia, *ib;
187   const MatScalar *aa, *bb, *aav, *bav;
188   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
189   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
190 
191   PetscFunctionBegin;
192   *keptrows = NULL;
193 
194   ia = a->i;
195   ib = b->i;
196   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
197   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
198   for (i = 0; i < m; i++) {
199     na = ia[i + 1] - ia[i];
200     nb = ib[i + 1] - ib[i];
201     if (!na && !nb) {
202       cnt++;
203       goto ok1;
204     }
205     aa = aav + ia[i];
206     for (j = 0; j < na; j++) {
207       if (aa[j] != 0.0) goto ok1;
208     }
209     bb = PetscSafePointerPlusOffset(bav, ib[i]);
210     for (j = 0; j < nb; j++) {
211       if (bb[j] != 0.0) goto ok1;
212     }
213     cnt++;
214   ok1:;
215   }
216   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
217   if (!n0rows) {
218     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
220     PetscFunctionReturn(PETSC_SUCCESS);
221   }
222   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
223   cnt = 0;
224   for (i = 0; i < m; i++) {
225     na = ia[i + 1] - ia[i];
226     nb = ib[i + 1] - ib[i];
227     if (!na && !nb) continue;
228     aa = aav + ia[i];
229     for (j = 0; j < na; j++) {
230       if (aa[j] != 0.0) {
231         rows[cnt++] = rstart + i;
232         goto ok2;
233       }
234     }
235     bb = PetscSafePointerPlusOffset(bav, ib[i]);
236     for (j = 0; j < nb; j++) {
237       if (bb[j] != 0.0) {
238         rows[cnt++] = rstart + i;
239         goto ok2;
240       }
241     }
242   ok2:;
243   }
244   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
245   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
251 {
252   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
253   PetscBool   cong;
254 
255   PetscFunctionBegin;
256   PetscCall(MatHasCongruentLayouts(Y, &cong));
257   if (Y->assembled && cong) {
258     PetscCall(MatDiagonalSet(aij->A, D, is));
259   } else {
260     PetscCall(MatDiagonalSet_Default(Y, D, is));
261   }
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
266 {
267   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
268   PetscInt    i, rstart, nrows, *rows;
269 
270   PetscFunctionBegin;
271   *zrows = NULL;
272   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
273   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
274   for (i = 0; i < nrows; i++) rows[i] += rstart;
275   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
280 {
281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
282   PetscInt           i, m, n, *garray = aij->garray;
283   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
284   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
285   PetscReal         *work;
286   const PetscScalar *dummy;
287 
288   PetscFunctionBegin;
289   PetscCall(MatGetSize(A, &m, &n));
290   PetscCall(PetscCalloc1(n, &work));
291   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
292   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
294   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
295   if (type == NORM_2) {
296     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
297     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
298   } else if (type == NORM_1) {
299     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
300     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
301   } else if (type == NORM_INFINITY) {
302     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
303     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
304   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
305     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
306     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
307   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
308     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
309     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
310   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
311   if (type == NORM_INFINITY) {
312     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
313   } else {
314     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
315   }
316   PetscCall(PetscFree(work));
317   if (type == NORM_2) {
318     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
319   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
320     for (i = 0; i < n; i++) reductions[i] /= m;
321   }
322   PetscFunctionReturn(PETSC_SUCCESS);
323 }
324 
325 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
326 {
327   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
328   IS              sis, gis;
329   const PetscInt *isis, *igis;
330   PetscInt        n, *iis, nsis, ngis, rstart, i;
331 
332   PetscFunctionBegin;
333   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
334   PetscCall(MatFindNonzeroRows(a->B, &gis));
335   PetscCall(ISGetSize(gis, &ngis));
336   PetscCall(ISGetSize(sis, &nsis));
337   PetscCall(ISGetIndices(sis, &isis));
338   PetscCall(ISGetIndices(gis, &igis));
339 
340   PetscCall(PetscMalloc1(ngis + nsis, &iis));
341   PetscCall(PetscArraycpy(iis, igis, ngis));
342   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
343   n = ngis + nsis;
344   PetscCall(PetscSortRemoveDupsInt(&n, iis));
345   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
346   for (i = 0; i < n; i++) iis[i] += rstart;
347   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
348 
349   PetscCall(ISRestoreIndices(sis, &isis));
350   PetscCall(ISRestoreIndices(gis, &igis));
351   PetscCall(ISDestroy(&sis));
352   PetscCall(ISDestroy(&gis));
353   PetscFunctionReturn(PETSC_SUCCESS);
354 }
355 
356 /*
357   Local utility routine that creates a mapping from the global column
358 number to the local number in the off-diagonal part of the local
359 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
360 a slightly higher hash table cost; without it it is not scalable (each processor
361 has an order N integer array but is fast to access.
362 */
363 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
364 {
365   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
366   PetscInt    n   = aij->B->cmap->n, i;
367 
368   PetscFunctionBegin;
369   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
370 #if defined(PETSC_USE_CTABLE)
371   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
372   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
373 #else
374   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
375   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
376 #endif
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
381   do { \
382     if (col <= lastcol1) low1 = 0; \
383     else high1 = nrow1; \
384     lastcol1 = col; \
385     while (high1 - low1 > 5) { \
386       t = (low1 + high1) / 2; \
387       if (rp1[t] > col) high1 = t; \
388       else low1 = t; \
389     } \
390     for (_i = low1; _i < high1; _i++) { \
391       if (rp1[_i] > col) break; \
392       if (rp1[_i] == col) { \
393         if (addv == ADD_VALUES) { \
394           ap1[_i] += value; \
395           /* Not sure LogFlops will slow dow the code or not */ \
396           (void)PetscLogFlops(1.0); \
397         } else ap1[_i] = value; \
398         goto a_noinsert; \
399       } \
400     } \
401     if (value == 0.0 && ignorezeroentries && row != col) { \
402       low1  = 0; \
403       high1 = nrow1; \
404       goto a_noinsert; \
405     } \
406     if (nonew == 1) { \
407       low1  = 0; \
408       high1 = nrow1; \
409       goto a_noinsert; \
410     } \
411     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
412     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
413     N = nrow1++ - 1; \
414     a->nz++; \
415     high1++; \
416     /* shift up all the later entries in this row */ \
417     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
418     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
419     rp1[_i] = col; \
420     ap1[_i] = value; \
421     A->nonzerostate++; \
422   a_noinsert:; \
423     ailen[row] = nrow1; \
424   } while (0)
425 
426 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
427   do { \
428     if (col <= lastcol2) low2 = 0; \
429     else high2 = nrow2; \
430     lastcol2 = col; \
431     while (high2 - low2 > 5) { \
432       t = (low2 + high2) / 2; \
433       if (rp2[t] > col) high2 = t; \
434       else low2 = t; \
435     } \
436     for (_i = low2; _i < high2; _i++) { \
437       if (rp2[_i] > col) break; \
438       if (rp2[_i] == col) { \
439         if (addv == ADD_VALUES) { \
440           ap2[_i] += value; \
441           (void)PetscLogFlops(1.0); \
442         } else ap2[_i] = value; \
443         goto b_noinsert; \
444       } \
445     } \
446     if (value == 0.0 && ignorezeroentries) { \
447       low2  = 0; \
448       high2 = nrow2; \
449       goto b_noinsert; \
450     } \
451     if (nonew == 1) { \
452       low2  = 0; \
453       high2 = nrow2; \
454       goto b_noinsert; \
455     } \
456     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
457     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
458     N = nrow2++ - 1; \
459     b->nz++; \
460     high2++; \
461     /* shift up all the later entries in this row */ \
462     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
463     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
464     rp2[_i] = col; \
465     ap2[_i] = value; \
466     B->nonzerostate++; \
467   b_noinsert:; \
468     bilen[row] = nrow2; \
469   } while (0)
470 
471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
472 {
473   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
474   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
475   PetscInt     l, *garray                         = mat->garray, diag;
476   PetscScalar *aa, *ba;
477 
478   PetscFunctionBegin;
479   /* code only works for square matrices A */
480 
481   /* find size of row to the left of the diagonal part */
482   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
483   row = row - diag;
484   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
485     if (garray[b->j[b->i[row] + l]] > diag) break;
486   }
487   if (l) {
488     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
489     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
490     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
491   }
492 
493   /* diagonal part */
494   if (a->i[row + 1] - a->i[row]) {
495     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
496     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
497     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
498   }
499 
500   /* right of diagonal part */
501   if (b->i[row + 1] - b->i[row] - l) {
502     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
503     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
504     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
505   }
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
510 {
511   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
512   PetscScalar value = 0.0;
513   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
514   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
515   PetscBool   roworiented = aij->roworiented;
516 
517   /* Some Variables required in the macro */
518   Mat         A     = aij->A;
519   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
520   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
521   PetscBool   ignorezeroentries = a->ignorezeroentries;
522   Mat         B                 = aij->B;
523   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
524   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
525   MatScalar  *aa, *ba;
526   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
527   PetscInt    nonew;
528   MatScalar  *ap1, *ap2;
529 
530   PetscFunctionBegin;
531   PetscCall(MatSeqAIJGetArray(A, &aa));
532   PetscCall(MatSeqAIJGetArray(B, &ba));
533   for (i = 0; i < m; i++) {
534     if (im[i] < 0) continue;
535     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
536     if (im[i] >= rstart && im[i] < rend) {
537       row      = im[i] - rstart;
538       lastcol1 = -1;
539       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
540       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
541       rmax1    = aimax[row];
542       nrow1    = ailen[row];
543       low1     = 0;
544       high1    = nrow1;
545       lastcol2 = -1;
546       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
547       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
548       rmax2    = bimax[row];
549       nrow2    = bilen[row];
550       low2     = 0;
551       high2    = nrow2;
552 
553       for (j = 0; j < n; j++) {
554         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
555         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
556         if (in[j] >= cstart && in[j] < cend) {
557           col   = in[j] - cstart;
558           nonew = a->nonew;
559           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
560         } else if (in[j] < 0) {
561           continue;
562         } else {
563           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
564           if (mat->was_assembled) {
565             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
566 #if defined(PETSC_USE_CTABLE)
567             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
568             col--;
569 #else
570             col = aij->colmap[in[j]] - 1;
571 #endif
572             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
573               PetscCall(MatDisAssemble_MPIAIJ(mat));               /* Change aij->B from reduced/local format to expanded/global format */
574               col = in[j];
575               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
576               B     = aij->B;
577               b     = (Mat_SeqAIJ *)B->data;
578               bimax = b->imax;
579               bi    = b->i;
580               bilen = b->ilen;
581               bj    = b->j;
582               ba    = b->a;
583               rp2   = bj + bi[row];
584               ap2   = ba + bi[row];
585               rmax2 = bimax[row];
586               nrow2 = bilen[row];
587               low2  = 0;
588               high2 = nrow2;
589               bm    = aij->B->rmap->n;
590               ba    = b->a;
591             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
592               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
593                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
594               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
595             }
596           } else col = in[j];
597           nonew = b->nonew;
598           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
599         }
600       }
601     } else {
602       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
603       if (!aij->donotstash) {
604         mat->assembled = PETSC_FALSE;
605         if (roworiented) {
606           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
607         } else {
608           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
609         }
610       }
611     }
612   }
613   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
614   PetscCall(MatSeqAIJRestoreArray(B, &ba));
615   PetscFunctionReturn(PETSC_SUCCESS);
616 }
617 
618 /*
619     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
620     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
621     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
622 */
623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
624 {
625   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
626   Mat         A      = aij->A; /* diagonal part of the matrix */
627   Mat         B      = aij->B; /* off-diagonal part of the matrix */
628   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
629   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
630   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
631   PetscInt   *ailen = a->ilen, *aj = a->j;
632   PetscInt   *bilen = b->ilen, *bj = b->j;
633   PetscInt    am          = aij->A->rmap->n, j;
634   PetscInt    diag_so_far = 0, dnz;
635   PetscInt    offd_so_far = 0, onz;
636 
637   PetscFunctionBegin;
638   /* Iterate over all rows of the matrix */
639   for (j = 0; j < am; j++) {
640     dnz = onz = 0;
641     /*  Iterate over all non-zero columns of the current row */
642     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
643       /* If column is in the diagonal */
644       if (mat_j[col] >= cstart && mat_j[col] < cend) {
645         aj[diag_so_far++] = mat_j[col] - cstart;
646         dnz++;
647       } else { /* off-diagonal entries */
648         bj[offd_so_far++] = mat_j[col];
649         onz++;
650       }
651     }
652     ailen[j] = dnz;
653     bilen[j] = onz;
654   }
655   PetscFunctionReturn(PETSC_SUCCESS);
656 }
657 
658 /*
659     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
660     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
661     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
662     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
663     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
664 */
665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
666 {
667   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
668   Mat          A    = aij->A; /* diagonal part of the matrix */
669   Mat          B    = aij->B; /* off-diagonal part of the matrix */
670   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
671   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
672   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
673   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
674   PetscInt    *ailen = a->ilen, *aj = a->j;
675   PetscInt    *bilen = b->ilen, *bj = b->j;
676   PetscInt     am          = aij->A->rmap->n, j;
677   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
678   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
679   PetscScalar *aa = a->a, *ba = b->a;
680 
681   PetscFunctionBegin;
682   /* Iterate over all rows of the matrix */
683   for (j = 0; j < am; j++) {
684     dnz_row = onz_row = 0;
685     rowstart_offd     = full_offd_i[j];
686     rowstart_diag     = full_diag_i[j];
687     /*  Iterate over all non-zero columns of the current row */
688     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
689       /* If column is in the diagonal */
690       if (mat_j[col] >= cstart && mat_j[col] < cend) {
691         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
692         aa[rowstart_diag + dnz_row] = mat_a[col];
693         dnz_row++;
694       } else { /* off-diagonal entries */
695         bj[rowstart_offd + onz_row] = mat_j[col];
696         ba[rowstart_offd + onz_row] = mat_a[col];
697         onz_row++;
698       }
699     }
700     ailen[j] = dnz_row;
701     bilen[j] = onz_row;
702   }
703   PetscFunctionReturn(PETSC_SUCCESS);
704 }
705 
706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
707 {
708   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
709   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
710   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
711 
712   PetscFunctionBegin;
713   for (i = 0; i < m; i++) {
714     if (idxm[i] < 0) continue; /* negative row */
715     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
716     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
717     row = idxm[i] - rstart;
718     for (j = 0; j < n; j++) {
719       if (idxn[j] < 0) continue; /* negative column */
720       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
721       if (idxn[j] >= cstart && idxn[j] < cend) {
722         col = idxn[j] - cstart;
723         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
724       } else {
725         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
726 #if defined(PETSC_USE_CTABLE)
727         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
728         col--;
729 #else
730         col = aij->colmap[idxn[j]] - 1;
731 #endif
732         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
733         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
734       }
735     }
736   }
737   PetscFunctionReturn(PETSC_SUCCESS);
738 }
739 
740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
741 {
742   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
743   PetscInt    nstash, reallocs;
744 
745   PetscFunctionBegin;
746   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
747 
748   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
749   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
750   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
751   PetscFunctionReturn(PETSC_SUCCESS);
752 }
753 
754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
755 {
756   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
757   PetscMPIInt  n;
758   PetscInt     i, j, rstart, ncols, flg;
759   PetscInt    *row, *col;
760   PetscBool    other_disassembled;
761   PetscScalar *val;
762 
763   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
764 
765   PetscFunctionBegin;
766   if (!aij->donotstash && !mat->nooffprocentries) {
767     while (1) {
768       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
769       if (!flg) break;
770 
771       for (i = 0; i < n;) {
772         /* Now identify the consecutive vals belonging to the same row */
773         for (j = i, rstart = row[j]; j < n; j++) {
774           if (row[j] != rstart) break;
775         }
776         if (j < n) ncols = j - i;
777         else ncols = n - i;
778         /* Now assemble all these values with a single function call */
779         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
780         i = j;
781       }
782     }
783     PetscCall(MatStashScatterEnd_Private(&mat->stash));
784   }
785 #if defined(PETSC_HAVE_DEVICE)
786   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
787   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
788   if (mat->boundtocpu) {
789     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
790     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
791   }
792 #endif
793   PetscCall(MatAssemblyBegin(aij->A, mode));
794   PetscCall(MatAssemblyEnd(aij->A, mode));
795 
796   /* determine if any processor has disassembled, if so we must
797      also disassemble ourself, in order that we may reassemble. */
798   /*
799      if nonzero structure of submatrix B cannot change then we know that
800      no processor disassembled thus we can skip this stuff
801   */
802   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
803     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
804     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
805       PetscCall(MatDisAssemble_MPIAIJ(mat));
806     }
807   }
808   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
809   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
810 #if defined(PETSC_HAVE_DEVICE)
811   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
812 #endif
813   PetscCall(MatAssemblyBegin(aij->B, mode));
814   PetscCall(MatAssemblyEnd(aij->B, mode));
815 
816   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
817 
818   aij->rowvalues = NULL;
819 
820   PetscCall(VecDestroy(&aij->diag));
821 
822   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
823   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
824     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
825     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
826   }
827 #if defined(PETSC_HAVE_DEVICE)
828   mat->offloadmask = PETSC_OFFLOAD_BOTH;
829 #endif
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
834 {
835   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
836 
837   PetscFunctionBegin;
838   PetscCall(MatZeroEntries(l->A));
839   PetscCall(MatZeroEntries(l->B));
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
844 {
845   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
846   PetscInt   *lrows;
847   PetscInt    r, len;
848   PetscBool   cong;
849 
850   PetscFunctionBegin;
851   /* get locally owned rows */
852   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
853   PetscCall(MatHasCongruentLayouts(A, &cong));
854   /* fix right-hand side if needed */
855   if (x && b) {
856     const PetscScalar *xx;
857     PetscScalar       *bb;
858 
859     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
860     PetscCall(VecGetArrayRead(x, &xx));
861     PetscCall(VecGetArray(b, &bb));
862     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
863     PetscCall(VecRestoreArrayRead(x, &xx));
864     PetscCall(VecRestoreArray(b, &bb));
865   }
866 
867   if (diag != 0.0 && cong) {
868     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
869     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
870   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
871     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
872     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
873     PetscInt    nnwA, nnwB;
874     PetscBool   nnzA, nnzB;
875 
876     nnwA = aijA->nonew;
877     nnwB = aijB->nonew;
878     nnzA = aijA->keepnonzeropattern;
879     nnzB = aijB->keepnonzeropattern;
880     if (!nnzA) {
881       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
882       aijA->nonew = 0;
883     }
884     if (!nnzB) {
885       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
886       aijB->nonew = 0;
887     }
888     /* Must zero here before the next loop */
889     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
890     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
891     for (r = 0; r < len; ++r) {
892       const PetscInt row = lrows[r] + A->rmap->rstart;
893       if (row >= A->cmap->N) continue;
894       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
895     }
896     aijA->nonew = nnwA;
897     aijB->nonew = nnwB;
898   } else {
899     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
900     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
901   }
902   PetscCall(PetscFree(lrows));
903   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
904   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
905 
906   /* only change matrix nonzero state if pattern was allowed to be changed */
907   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
908     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
909     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
910   }
911   PetscFunctionReturn(PETSC_SUCCESS);
912 }
913 
914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
915 {
916   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
917   PetscMPIInt        n = A->rmap->n;
918   PetscInt           i, j, r, m, len = 0;
919   PetscInt          *lrows, *owners = A->rmap->range;
920   PetscMPIInt        p = 0;
921   PetscSFNode       *rrows;
922   PetscSF            sf;
923   const PetscScalar *xx;
924   PetscScalar       *bb, *mask, *aij_a;
925   Vec                xmask, lmask;
926   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
927   const PetscInt    *aj, *ii, *ridx;
928   PetscScalar       *aa;
929 
930   PetscFunctionBegin;
931   /* Create SF where leaves are input rows and roots are owned rows */
932   PetscCall(PetscMalloc1(n, &lrows));
933   for (r = 0; r < n; ++r) lrows[r] = -1;
934   PetscCall(PetscMalloc1(N, &rrows));
935   for (r = 0; r < N; ++r) {
936     const PetscInt idx = rows[r];
937     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
938     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
939       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
940     }
941     rrows[r].rank  = p;
942     rrows[r].index = rows[r] - owners[p];
943   }
944   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
945   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
946   /* Collect flags for rows to be zeroed */
947   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
948   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
949   PetscCall(PetscSFDestroy(&sf));
950   /* Compress and put in row numbers */
951   for (r = 0; r < n; ++r)
952     if (lrows[r] >= 0) lrows[len++] = r;
953   /* zero diagonal part of matrix */
954   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
955   /* handle off-diagonal part of matrix */
956   PetscCall(MatCreateVecs(A, &xmask, NULL));
957   PetscCall(VecDuplicate(l->lvec, &lmask));
958   PetscCall(VecGetArray(xmask, &bb));
959   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
960   PetscCall(VecRestoreArray(xmask, &bb));
961   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
962   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
963   PetscCall(VecDestroy(&xmask));
964   if (x && b) { /* this code is buggy when the row and column layout don't match */
965     PetscBool cong;
966 
967     PetscCall(MatHasCongruentLayouts(A, &cong));
968     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
969     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
970     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
971     PetscCall(VecGetArrayRead(l->lvec, &xx));
972     PetscCall(VecGetArray(b, &bb));
973   }
974   PetscCall(VecGetArray(lmask, &mask));
975   /* remove zeroed rows of off-diagonal matrix */
976   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
977   ii = aij->i;
978   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
979   /* loop over all elements of off process part of matrix zeroing removed columns*/
980   if (aij->compressedrow.use) {
981     m    = aij->compressedrow.nrows;
982     ii   = aij->compressedrow.i;
983     ridx = aij->compressedrow.rindex;
984     for (i = 0; i < m; i++) {
985       n  = ii[i + 1] - ii[i];
986       aj = aij->j + ii[i];
987       aa = aij_a + ii[i];
988 
989       for (j = 0; j < n; j++) {
990         if (PetscAbsScalar(mask[*aj])) {
991           if (b) bb[*ridx] -= *aa * xx[*aj];
992           *aa = 0.0;
993         }
994         aa++;
995         aj++;
996       }
997       ridx++;
998     }
999   } else { /* do not use compressed row format */
1000     m = l->B->rmap->n;
1001     for (i = 0; i < m; i++) {
1002       n  = ii[i + 1] - ii[i];
1003       aj = aij->j + ii[i];
1004       aa = aij_a + ii[i];
1005       for (j = 0; j < n; j++) {
1006         if (PetscAbsScalar(mask[*aj])) {
1007           if (b) bb[i] -= *aa * xx[*aj];
1008           *aa = 0.0;
1009         }
1010         aa++;
1011         aj++;
1012       }
1013     }
1014   }
1015   if (x && b) {
1016     PetscCall(VecRestoreArray(b, &bb));
1017     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1018   }
1019   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1020   PetscCall(VecRestoreArray(lmask, &mask));
1021   PetscCall(VecDestroy(&lmask));
1022   PetscCall(PetscFree(lrows));
1023 
1024   /* only change matrix nonzero state if pattern was allowed to be changed */
1025   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1026     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1027     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1028   }
1029   PetscFunctionReturn(PETSC_SUCCESS);
1030 }
1031 
1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1033 {
1034   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1035   PetscInt    nt;
1036   VecScatter  Mvctx = a->Mvctx;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(VecGetLocalSize(xx, &nt));
1040   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1041   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1042   PetscUseTypeMethod(a->A, mult, xx, yy);
1043   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1044   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1045   PetscFunctionReturn(PETSC_SUCCESS);
1046 }
1047 
1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1049 {
1050   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1051 
1052   PetscFunctionBegin;
1053   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1058 {
1059   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1060   VecScatter  Mvctx = a->Mvctx;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1065   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1066   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1071 {
1072   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1073 
1074   PetscFunctionBegin;
1075   /* do nondiagonal part */
1076   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1077   /* do local part */
1078   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1079   /* add partial results together */
1080   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1081   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1082   PetscFunctionReturn(PETSC_SUCCESS);
1083 }
1084 
1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1086 {
1087   MPI_Comm    comm;
1088   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1089   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1090   IS          Me, Notme;
1091   PetscInt    M, N, first, last, *notme, i;
1092   PetscBool   lf;
1093   PetscMPIInt size;
1094 
1095   PetscFunctionBegin;
1096   /* Easy test: symmetric diagonal block */
1097   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1098   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1099   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1100   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1101   PetscCallMPI(MPI_Comm_size(comm, &size));
1102   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1103 
1104   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1105   PetscCall(MatGetSize(Amat, &M, &N));
1106   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1107   PetscCall(PetscMalloc1(N - last + first, &notme));
1108   for (i = 0; i < first; i++) notme[i] = i;
1109   for (i = last; i < M; i++) notme[i - last + first] = i;
1110   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1111   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1112   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1113   Aoff = Aoffs[0];
1114   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1115   Boff = Boffs[0];
1116   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1117   PetscCall(MatDestroyMatrices(1, &Aoffs));
1118   PetscCall(MatDestroyMatrices(1, &Boffs));
1119   PetscCall(ISDestroy(&Me));
1120   PetscCall(ISDestroy(&Notme));
1121   PetscCall(PetscFree(notme));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1126 {
1127   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1128 
1129   PetscFunctionBegin;
1130   /* do nondiagonal part */
1131   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1132   /* do local part */
1133   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1134   /* add partial results together */
1135   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1136   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1137   PetscFunctionReturn(PETSC_SUCCESS);
1138 }
1139 
1140 /*
1141   This only works correctly for square matrices where the subblock A->A is the
1142    diagonal block
1143 */
1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1145 {
1146   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1147 
1148   PetscFunctionBegin;
1149   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1150   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1151   PetscCall(MatGetDiagonal(a->A, v));
1152   PetscFunctionReturn(PETSC_SUCCESS);
1153 }
1154 
1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1156 {
1157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCall(MatScale(a->A, aa));
1161   PetscCall(MatScale(a->B, aa));
1162   PetscFunctionReturn(PETSC_SUCCESS);
1163 }
1164 
1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1166 {
1167   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1168   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1169   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1170   const PetscInt    *garray = aij->garray;
1171   const PetscScalar *aa, *ba;
1172   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1173   PetscInt64         nz, hnz;
1174   PetscInt          *rowlens;
1175   PetscInt          *colidxs;
1176   PetscScalar       *matvals;
1177   PetscMPIInt        rank;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(PetscViewerSetUp(viewer));
1181 
1182   M  = mat->rmap->N;
1183   N  = mat->cmap->N;
1184   m  = mat->rmap->n;
1185   rs = mat->rmap->rstart;
1186   cs = mat->cmap->rstart;
1187   nz = A->nz + B->nz;
1188 
1189   /* write matrix header */
1190   header[0] = MAT_FILE_CLASSID;
1191   header[1] = M;
1192   header[2] = N;
1193   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1194   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1195   if (rank == 0) {
1196     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1197     else header[3] = (PetscInt)hnz;
1198   }
1199   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1200 
1201   /* fill in and store row lengths  */
1202   PetscCall(PetscMalloc1(m, &rowlens));
1203   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1204   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1205   PetscCall(PetscFree(rowlens));
1206 
1207   /* fill in and store column indices */
1208   PetscCall(PetscMalloc1(nz, &colidxs));
1209   for (cnt = 0, i = 0; i < m; i++) {
1210     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1211       if (garray[B->j[jb]] > cs) break;
1212       colidxs[cnt++] = garray[B->j[jb]];
1213     }
1214     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1215     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1216   }
1217   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1218   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1219   PetscCall(PetscFree(colidxs));
1220 
1221   /* fill in and store nonzero values */
1222   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1223   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1224   PetscCall(PetscMalloc1(nz, &matvals));
1225   for (cnt = 0, i = 0; i < m; i++) {
1226     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1227       if (garray[B->j[jb]] > cs) break;
1228       matvals[cnt++] = ba[jb];
1229     }
1230     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1231     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1232   }
1233   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1234   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1235   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1236   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1237   PetscCall(PetscFree(matvals));
1238 
1239   /* write block size option to the viewer's .info file */
1240   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 #include <petscdraw.h>
1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1246 {
1247   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1248   PetscMPIInt       rank = aij->rank, size = aij->size;
1249   PetscBool         isdraw, iascii, isbinary;
1250   PetscViewer       sviewer;
1251   PetscViewerFormat format;
1252 
1253   PetscFunctionBegin;
1254   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1255   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1256   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1257   if (iascii) {
1258     PetscCall(PetscViewerGetFormat(viewer, &format));
1259     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1260       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1261       PetscCall(PetscMalloc1(size, &nz));
1262       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1263       for (i = 0; i < (PetscInt)size; i++) {
1264         nmax = PetscMax(nmax, nz[i]);
1265         nmin = PetscMin(nmin, nz[i]);
1266         navg += nz[i];
1267       }
1268       PetscCall(PetscFree(nz));
1269       navg = navg / size;
1270       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1271       PetscFunctionReturn(PETSC_SUCCESS);
1272     }
1273     PetscCall(PetscViewerGetFormat(viewer, &format));
1274     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1275       MatInfo   info;
1276       PetscInt *inodes = NULL;
1277 
1278       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1279       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1280       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1281       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1282       if (!inodes) {
1283         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1284                                                      (double)info.memory));
1285       } else {
1286         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1287                                                      (double)info.memory));
1288       }
1289       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1290       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1291       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1292       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1293       PetscCall(PetscViewerFlush(viewer));
1294       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1295       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1296       PetscCall(VecScatterView(aij->Mvctx, viewer));
1297       PetscFunctionReturn(PETSC_SUCCESS);
1298     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1299       PetscInt inodecount, inodelimit, *inodes;
1300       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1301       if (inodes) {
1302         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1303       } else {
1304         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1305       }
1306       PetscFunctionReturn(PETSC_SUCCESS);
1307     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1308       PetscFunctionReturn(PETSC_SUCCESS);
1309     }
1310   } else if (isbinary) {
1311     if (size == 1) {
1312       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1313       PetscCall(MatView(aij->A, viewer));
1314     } else {
1315       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1316     }
1317     PetscFunctionReturn(PETSC_SUCCESS);
1318   } else if (iascii && size == 1) {
1319     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1320     PetscCall(MatView(aij->A, viewer));
1321     PetscFunctionReturn(PETSC_SUCCESS);
1322   } else if (isdraw) {
1323     PetscDraw draw;
1324     PetscBool isnull;
1325     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1326     PetscCall(PetscDrawIsNull(draw, &isnull));
1327     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1328   }
1329 
1330   { /* assemble the entire matrix onto first processor */
1331     Mat A = NULL, Av;
1332     IS  isrow, iscol;
1333 
1334     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1335     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1336     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1337     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1338     /*  The commented code uses MatCreateSubMatrices instead */
1339     /*
1340     Mat *AA, A = NULL, Av;
1341     IS  isrow,iscol;
1342 
1343     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1344     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1345     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1346     if (rank == 0) {
1347        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1348        A    = AA[0];
1349        Av   = AA[0];
1350     }
1351     PetscCall(MatDestroySubMatrices(1,&AA));
1352 */
1353     PetscCall(ISDestroy(&iscol));
1354     PetscCall(ISDestroy(&isrow));
1355     /*
1356        Everyone has to call to draw the matrix since the graphics waits are
1357        synchronized across all processors that share the PetscDraw object
1358     */
1359     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1360     if (rank == 0) {
1361       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1362       PetscCall(MatView_SeqAIJ(Av, sviewer));
1363     }
1364     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1365     PetscCall(MatDestroy(&A));
1366   }
1367   PetscFunctionReturn(PETSC_SUCCESS);
1368 }
1369 
1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1371 {
1372   PetscBool iascii, isdraw, issocket, isbinary;
1373 
1374   PetscFunctionBegin;
1375   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1376   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1377   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1378   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1379   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1380   PetscFunctionReturn(PETSC_SUCCESS);
1381 }
1382 
1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1384 {
1385   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1386   Vec         bb1 = NULL;
1387   PetscBool   hasop;
1388 
1389   PetscFunctionBegin;
1390   if (flag == SOR_APPLY_UPPER) {
1391     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1392     PetscFunctionReturn(PETSC_SUCCESS);
1393   }
1394 
1395   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1396 
1397   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1398     if (flag & SOR_ZERO_INITIAL_GUESS) {
1399       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1400       its--;
1401     }
1402 
1403     while (its--) {
1404       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1405       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1406 
1407       /* update rhs: bb1 = bb - B*x */
1408       PetscCall(VecScale(mat->lvec, -1.0));
1409       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1410 
1411       /* local sweep */
1412       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1413     }
1414   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419     while (its--) {
1420       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1421       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422 
1423       /* update rhs: bb1 = bb - B*x */
1424       PetscCall(VecScale(mat->lvec, -1.0));
1425       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1426 
1427       /* local sweep */
1428       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1429     }
1430   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1433       its--;
1434     }
1435     while (its--) {
1436       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438 
1439       /* update rhs: bb1 = bb - B*x */
1440       PetscCall(VecScale(mat->lvec, -1.0));
1441       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1442 
1443       /* local sweep */
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1445     }
1446   } else if (flag & SOR_EISENSTAT) {
1447     Vec xx1;
1448 
1449     PetscCall(VecDuplicate(bb, &xx1));
1450     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1451 
1452     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454     if (!mat->diag) {
1455       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1456       PetscCall(MatGetDiagonal(matin, mat->diag));
1457     }
1458     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1459     if (hasop) {
1460       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1461     } else {
1462       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1463     }
1464     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1465 
1466     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1467 
1468     /* local sweep */
1469     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1470     PetscCall(VecAXPY(xx, 1.0, xx1));
1471     PetscCall(VecDestroy(&xx1));
1472   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1473 
1474   PetscCall(VecDestroy(&bb1));
1475 
1476   matin->factorerrortype = mat->A->factorerrortype;
1477   PetscFunctionReturn(PETSC_SUCCESS);
1478 }
1479 
1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1481 {
1482   Mat             aA, aB, Aperm;
1483   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1484   PetscScalar    *aa, *ba;
1485   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1486   PetscSF         rowsf, sf;
1487   IS              parcolp = NULL;
1488   PetscBool       done;
1489 
1490   PetscFunctionBegin;
1491   PetscCall(MatGetLocalSize(A, &m, &n));
1492   PetscCall(ISGetIndices(rowp, &rwant));
1493   PetscCall(ISGetIndices(colp, &cwant));
1494   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1495 
1496   /* Invert row permutation to find out where my rows should go */
1497   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1498   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1499   PetscCall(PetscSFSetFromOptions(rowsf));
1500   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1501   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1502   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1503 
1504   /* Invert column permutation to find out where my columns should go */
1505   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1506   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1507   PetscCall(PetscSFSetFromOptions(sf));
1508   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1509   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1510   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1511   PetscCall(PetscSFDestroy(&sf));
1512 
1513   PetscCall(ISRestoreIndices(rowp, &rwant));
1514   PetscCall(ISRestoreIndices(colp, &cwant));
1515   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1516 
1517   /* Find out where my gcols should go */
1518   PetscCall(MatGetSize(aB, NULL, &ng));
1519   PetscCall(PetscMalloc1(ng, &gcdest));
1520   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1521   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1522   PetscCall(PetscSFSetFromOptions(sf));
1523   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1524   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1525   PetscCall(PetscSFDestroy(&sf));
1526 
1527   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1528   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1529   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1530   for (i = 0; i < m; i++) {
1531     PetscInt    row = rdest[i];
1532     PetscMPIInt rowner;
1533     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1534     for (j = ai[i]; j < ai[i + 1]; j++) {
1535       PetscInt    col = cdest[aj[j]];
1536       PetscMPIInt cowner;
1537       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1538       if (rowner == cowner) dnnz[i]++;
1539       else onnz[i]++;
1540     }
1541     for (j = bi[i]; j < bi[i + 1]; j++) {
1542       PetscInt    col = gcdest[bj[j]];
1543       PetscMPIInt cowner;
1544       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1545       if (rowner == cowner) dnnz[i]++;
1546       else onnz[i]++;
1547     }
1548   }
1549   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1550   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1551   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1552   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1553   PetscCall(PetscSFDestroy(&rowsf));
1554 
1555   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1556   PetscCall(MatSeqAIJGetArray(aA, &aa));
1557   PetscCall(MatSeqAIJGetArray(aB, &ba));
1558   for (i = 0; i < m; i++) {
1559     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1560     PetscInt  j0, rowlen;
1561     rowlen = ai[i + 1] - ai[i];
1562     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1563       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1564       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1565     }
1566     rowlen = bi[i + 1] - bi[i];
1567     for (j0 = j = 0; j < rowlen; j0 = j) {
1568       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1569       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1570     }
1571   }
1572   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1573   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1574   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1575   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1576   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1577   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1578   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1579   PetscCall(PetscFree3(work, rdest, cdest));
1580   PetscCall(PetscFree(gcdest));
1581   if (parcolp) PetscCall(ISDestroy(&colp));
1582   *B = Aperm;
1583   PetscFunctionReturn(PETSC_SUCCESS);
1584 }
1585 
1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1587 {
1588   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1589 
1590   PetscFunctionBegin;
1591   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1592   if (ghosts) *ghosts = aij->garray;
1593   PetscFunctionReturn(PETSC_SUCCESS);
1594 }
1595 
1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1597 {
1598   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1599   Mat            A = mat->A, B = mat->B;
1600   PetscLogDouble isend[5], irecv[5];
1601 
1602   PetscFunctionBegin;
1603   info->block_size = 1.0;
1604   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1605 
1606   isend[0] = info->nz_used;
1607   isend[1] = info->nz_allocated;
1608   isend[2] = info->nz_unneeded;
1609   isend[3] = info->memory;
1610   isend[4] = info->mallocs;
1611 
1612   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1613 
1614   isend[0] += info->nz_used;
1615   isend[1] += info->nz_allocated;
1616   isend[2] += info->nz_unneeded;
1617   isend[3] += info->memory;
1618   isend[4] += info->mallocs;
1619   if (flag == MAT_LOCAL) {
1620     info->nz_used      = isend[0];
1621     info->nz_allocated = isend[1];
1622     info->nz_unneeded  = isend[2];
1623     info->memory       = isend[3];
1624     info->mallocs      = isend[4];
1625   } else if (flag == MAT_GLOBAL_MAX) {
1626     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1627 
1628     info->nz_used      = irecv[0];
1629     info->nz_allocated = irecv[1];
1630     info->nz_unneeded  = irecv[2];
1631     info->memory       = irecv[3];
1632     info->mallocs      = irecv[4];
1633   } else if (flag == MAT_GLOBAL_SUM) {
1634     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1635 
1636     info->nz_used      = irecv[0];
1637     info->nz_allocated = irecv[1];
1638     info->nz_unneeded  = irecv[2];
1639     info->memory       = irecv[3];
1640     info->mallocs      = irecv[4];
1641   }
1642   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1643   info->fill_ratio_needed = 0;
1644   info->factor_mallocs    = 0;
1645   PetscFunctionReturn(PETSC_SUCCESS);
1646 }
1647 
1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1649 {
1650   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1651 
1652   PetscFunctionBegin;
1653   switch (op) {
1654   case MAT_NEW_NONZERO_LOCATIONS:
1655   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1656   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1657   case MAT_KEEP_NONZERO_PATTERN:
1658   case MAT_NEW_NONZERO_LOCATION_ERR:
1659   case MAT_USE_INODES:
1660   case MAT_IGNORE_ZERO_ENTRIES:
1661   case MAT_FORM_EXPLICIT_TRANSPOSE:
1662     MatCheckPreallocated(A, 1);
1663     PetscCall(MatSetOption(a->A, op, flg));
1664     PetscCall(MatSetOption(a->B, op, flg));
1665     break;
1666   case MAT_ROW_ORIENTED:
1667     MatCheckPreallocated(A, 1);
1668     a->roworiented = flg;
1669 
1670     PetscCall(MatSetOption(a->A, op, flg));
1671     PetscCall(MatSetOption(a->B, op, flg));
1672     break;
1673   case MAT_FORCE_DIAGONAL_ENTRIES:
1674   case MAT_SORTED_FULL:
1675     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1676     break;
1677   case MAT_IGNORE_OFF_PROC_ENTRIES:
1678     a->donotstash = flg;
1679     break;
1680   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1681   case MAT_SPD:
1682   case MAT_SYMMETRIC:
1683   case MAT_STRUCTURALLY_SYMMETRIC:
1684   case MAT_HERMITIAN:
1685   case MAT_SYMMETRY_ETERNAL:
1686   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1687   case MAT_SPD_ETERNAL:
1688     /* if the diagonal matrix is square it inherits some of the properties above */
1689     break;
1690   case MAT_SUBMAT_SINGLEIS:
1691     A->submat_singleis = flg;
1692     break;
1693   case MAT_STRUCTURE_ONLY:
1694     /* The option is handled directly by MatSetOption() */
1695     break;
1696   default:
1697     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1698   }
1699   PetscFunctionReturn(PETSC_SUCCESS);
1700 }
1701 
1702 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1703 {
1704   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1705   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1706   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1707   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1708   PetscInt    *cmap, *idx_p;
1709 
1710   PetscFunctionBegin;
1711   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1712   mat->getrowactive = PETSC_TRUE;
1713 
1714   if (!mat->rowvalues && (idx || v)) {
1715     /*
1716         allocate enough space to hold information from the longest row.
1717     */
1718     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1719     PetscInt    max = 1, tmp;
1720     for (i = 0; i < matin->rmap->n; i++) {
1721       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1722       if (max < tmp) max = tmp;
1723     }
1724     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1725   }
1726 
1727   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1728   lrow = row - rstart;
1729 
1730   pvA = &vworkA;
1731   pcA = &cworkA;
1732   pvB = &vworkB;
1733   pcB = &cworkB;
1734   if (!v) {
1735     pvA = NULL;
1736     pvB = NULL;
1737   }
1738   if (!idx) {
1739     pcA = NULL;
1740     if (!v) pcB = NULL;
1741   }
1742   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1743   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1744   nztot = nzA + nzB;
1745 
1746   cmap = mat->garray;
1747   if (v || idx) {
1748     if (nztot) {
1749       /* Sort by increasing column numbers, assuming A and B already sorted */
1750       PetscInt imark = -1;
1751       if (v) {
1752         *v = v_p = mat->rowvalues;
1753         for (i = 0; i < nzB; i++) {
1754           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1755           else break;
1756         }
1757         imark = i;
1758         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1759         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1760       }
1761       if (idx) {
1762         *idx = idx_p = mat->rowindices;
1763         if (imark > -1) {
1764           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1765         } else {
1766           for (i = 0; i < nzB; i++) {
1767             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1768             else break;
1769           }
1770           imark = i;
1771         }
1772         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1773         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1774       }
1775     } else {
1776       if (idx) *idx = NULL;
1777       if (v) *v = NULL;
1778     }
1779   }
1780   *nz = nztot;
1781   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1782   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1783   PetscFunctionReturn(PETSC_SUCCESS);
1784 }
1785 
1786 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1787 {
1788   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1789 
1790   PetscFunctionBegin;
1791   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1792   aij->getrowactive = PETSC_FALSE;
1793   PetscFunctionReturn(PETSC_SUCCESS);
1794 }
1795 
1796 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1797 {
1798   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1799   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1800   PetscInt         i, j, cstart = mat->cmap->rstart;
1801   PetscReal        sum = 0.0;
1802   const MatScalar *v, *amata, *bmata;
1803 
1804   PetscFunctionBegin;
1805   if (aij->size == 1) {
1806     PetscCall(MatNorm(aij->A, type, norm));
1807   } else {
1808     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1809     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1810     if (type == NORM_FROBENIUS) {
1811       v = amata;
1812       for (i = 0; i < amat->nz; i++) {
1813         sum += PetscRealPart(PetscConj(*v) * (*v));
1814         v++;
1815       }
1816       v = bmata;
1817       for (i = 0; i < bmat->nz; i++) {
1818         sum += PetscRealPart(PetscConj(*v) * (*v));
1819         v++;
1820       }
1821       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1822       *norm = PetscSqrtReal(*norm);
1823       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1824     } else if (type == NORM_1) { /* max column norm */
1825       PetscReal *tmp, *tmp2;
1826       PetscInt  *jj, *garray = aij->garray;
1827       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1828       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1829       *norm = 0.0;
1830       v     = amata;
1831       jj    = amat->j;
1832       for (j = 0; j < amat->nz; j++) {
1833         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1834         v++;
1835       }
1836       v  = bmata;
1837       jj = bmat->j;
1838       for (j = 0; j < bmat->nz; j++) {
1839         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1840         v++;
1841       }
1842       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1843       for (j = 0; j < mat->cmap->N; j++) {
1844         if (tmp2[j] > *norm) *norm = tmp2[j];
1845       }
1846       PetscCall(PetscFree(tmp));
1847       PetscCall(PetscFree(tmp2));
1848       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1849     } else if (type == NORM_INFINITY) { /* max row norm */
1850       PetscReal ntemp = 0.0;
1851       for (j = 0; j < aij->A->rmap->n; j++) {
1852         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1853         sum = 0.0;
1854         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1855           sum += PetscAbsScalar(*v);
1856           v++;
1857         }
1858         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1859         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1860           sum += PetscAbsScalar(*v);
1861           v++;
1862         }
1863         if (sum > ntemp) ntemp = sum;
1864       }
1865       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1866       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1867     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1868     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1869     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1870   }
1871   PetscFunctionReturn(PETSC_SUCCESS);
1872 }
1873 
1874 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1875 {
1876   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1877   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1878   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1879   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1880   Mat              B, A_diag, *B_diag;
1881   const MatScalar *pbv, *bv;
1882 
1883   PetscFunctionBegin;
1884   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1885   ma = A->rmap->n;
1886   na = A->cmap->n;
1887   mb = a->B->rmap->n;
1888   nb = a->B->cmap->n;
1889   ai = Aloc->i;
1890   aj = Aloc->j;
1891   bi = Bloc->i;
1892   bj = Bloc->j;
1893   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1894     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1895     PetscSFNode         *oloc;
1896     PETSC_UNUSED PetscSF sf;
1897 
1898     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1899     /* compute d_nnz for preallocation */
1900     PetscCall(PetscArrayzero(d_nnz, na));
1901     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1902     /* compute local off-diagonal contributions */
1903     PetscCall(PetscArrayzero(g_nnz, nb));
1904     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1905     /* map those to global */
1906     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1907     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1908     PetscCall(PetscSFSetFromOptions(sf));
1909     PetscCall(PetscArrayzero(o_nnz, na));
1910     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1911     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1912     PetscCall(PetscSFDestroy(&sf));
1913 
1914     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1915     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1916     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1917     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1918     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1919     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1920   } else {
1921     B = *matout;
1922     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1923   }
1924 
1925   b           = (Mat_MPIAIJ *)B->data;
1926   A_diag      = a->A;
1927   B_diag      = &b->A;
1928   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1929   A_diag_ncol = A_diag->cmap->N;
1930   B_diag_ilen = sub_B_diag->ilen;
1931   B_diag_i    = sub_B_diag->i;
1932 
1933   /* Set ilen for diagonal of B */
1934   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1935 
1936   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1937   very quickly (=without using MatSetValues), because all writes are local. */
1938   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1939   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1940 
1941   /* copy over the B part */
1942   PetscCall(PetscMalloc1(bi[mb], &cols));
1943   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1944   pbv = bv;
1945   row = A->rmap->rstart;
1946   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1947   cols_tmp = cols;
1948   for (i = 0; i < mb; i++) {
1949     ncol = bi[i + 1] - bi[i];
1950     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1951     row++;
1952     if (pbv) pbv += ncol;
1953     if (cols_tmp) cols_tmp += ncol;
1954   }
1955   PetscCall(PetscFree(cols));
1956   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1957 
1958   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1959   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1960   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1961     *matout = B;
1962   } else {
1963     PetscCall(MatHeaderMerge(A, &B));
1964   }
1965   PetscFunctionReturn(PETSC_SUCCESS);
1966 }
1967 
1968 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1969 {
1970   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1971   Mat         a = aij->A, b = aij->B;
1972   PetscInt    s1, s2, s3;
1973 
1974   PetscFunctionBegin;
1975   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1976   if (rr) {
1977     PetscCall(VecGetLocalSize(rr, &s1));
1978     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1979     /* Overlap communication with computation. */
1980     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1981   }
1982   if (ll) {
1983     PetscCall(VecGetLocalSize(ll, &s1));
1984     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1985     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1986   }
1987   /* scale  the diagonal block */
1988   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1989 
1990   if (rr) {
1991     /* Do a scatter end and then right scale the off-diagonal block */
1992     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1993     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1994   }
1995   PetscFunctionReturn(PETSC_SUCCESS);
1996 }
1997 
1998 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1999 {
2000   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2001 
2002   PetscFunctionBegin;
2003   PetscCall(MatSetUnfactored(a->A));
2004   PetscFunctionReturn(PETSC_SUCCESS);
2005 }
2006 
2007 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2008 {
2009   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2010   Mat         a, b, c, d;
2011   PetscBool   flg;
2012 
2013   PetscFunctionBegin;
2014   a = matA->A;
2015   b = matA->B;
2016   c = matB->A;
2017   d = matB->B;
2018 
2019   PetscCall(MatEqual(a, c, &flg));
2020   if (flg) PetscCall(MatEqual(b, d, &flg));
2021   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2022   PetscFunctionReturn(PETSC_SUCCESS);
2023 }
2024 
2025 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2026 {
2027   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2028   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2029 
2030   PetscFunctionBegin;
2031   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2032   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2033     /* because of the column compression in the off-processor part of the matrix a->B,
2034        the number of columns in a->B and b->B may be different, hence we cannot call
2035        the MatCopy() directly on the two parts. If need be, we can provide a more
2036        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2037        then copying the submatrices */
2038     PetscCall(MatCopy_Basic(A, B, str));
2039   } else {
2040     PetscCall(MatCopy(a->A, b->A, str));
2041     PetscCall(MatCopy(a->B, b->B, str));
2042   }
2043   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2044   PetscFunctionReturn(PETSC_SUCCESS);
2045 }
2046 
2047 /*
2048    Computes the number of nonzeros per row needed for preallocation when X and Y
2049    have different nonzero structure.
2050 */
2051 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2052 {
2053   PetscInt i, j, k, nzx, nzy;
2054 
2055   PetscFunctionBegin;
2056   /* Set the number of nonzeros in the new matrix */
2057   for (i = 0; i < m; i++) {
2058     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2059     nzx    = xi[i + 1] - xi[i];
2060     nzy    = yi[i + 1] - yi[i];
2061     nnz[i] = 0;
2062     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2063       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2064       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2065       nnz[i]++;
2066     }
2067     for (; k < nzy; k++) nnz[i]++;
2068   }
2069   PetscFunctionReturn(PETSC_SUCCESS);
2070 }
2071 
2072 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2073 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2074 {
2075   PetscInt    m = Y->rmap->N;
2076   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2077   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2078 
2079   PetscFunctionBegin;
2080   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2081   PetscFunctionReturn(PETSC_SUCCESS);
2082 }
2083 
2084 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2085 {
2086   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2087 
2088   PetscFunctionBegin;
2089   if (str == SAME_NONZERO_PATTERN) {
2090     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2091     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2092   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2093     PetscCall(MatAXPY_Basic(Y, a, X, str));
2094   } else {
2095     Mat       B;
2096     PetscInt *nnz_d, *nnz_o;
2097 
2098     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2099     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2100     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2101     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2102     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2103     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2104     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2105     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2106     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2107     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2108     PetscCall(MatHeaderMerge(Y, &B));
2109     PetscCall(PetscFree(nnz_d));
2110     PetscCall(PetscFree(nnz_o));
2111   }
2112   PetscFunctionReturn(PETSC_SUCCESS);
2113 }
2114 
2115 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2116 
2117 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2118 {
2119   PetscFunctionBegin;
2120   if (PetscDefined(USE_COMPLEX)) {
2121     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2122 
2123     PetscCall(MatConjugate_SeqAIJ(aij->A));
2124     PetscCall(MatConjugate_SeqAIJ(aij->B));
2125   }
2126   PetscFunctionReturn(PETSC_SUCCESS);
2127 }
2128 
2129 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2130 {
2131   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2132 
2133   PetscFunctionBegin;
2134   PetscCall(MatRealPart(a->A));
2135   PetscCall(MatRealPart(a->B));
2136   PetscFunctionReturn(PETSC_SUCCESS);
2137 }
2138 
2139 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2140 {
2141   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2142 
2143   PetscFunctionBegin;
2144   PetscCall(MatImaginaryPart(a->A));
2145   PetscCall(MatImaginaryPart(a->B));
2146   PetscFunctionReturn(PETSC_SUCCESS);
2147 }
2148 
2149 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2150 {
2151   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2152   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2153   PetscScalar       *va, *vv;
2154   Vec                vB, vA;
2155   const PetscScalar *vb;
2156 
2157   PetscFunctionBegin;
2158   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2159   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2160 
2161   PetscCall(VecGetArrayWrite(vA, &va));
2162   if (idx) {
2163     for (i = 0; i < m; i++) {
2164       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2165     }
2166   }
2167 
2168   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2169   PetscCall(PetscMalloc1(m, &idxb));
2170   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2171 
2172   PetscCall(VecGetArrayWrite(v, &vv));
2173   PetscCall(VecGetArrayRead(vB, &vb));
2174   for (i = 0; i < m; i++) {
2175     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2176       vv[i] = vb[i];
2177       if (idx) idx[i] = a->garray[idxb[i]];
2178     } else {
2179       vv[i] = va[i];
2180       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2181     }
2182   }
2183   PetscCall(VecRestoreArrayWrite(vA, &vv));
2184   PetscCall(VecRestoreArrayWrite(vA, &va));
2185   PetscCall(VecRestoreArrayRead(vB, &vb));
2186   PetscCall(PetscFree(idxb));
2187   PetscCall(VecDestroy(&vA));
2188   PetscCall(VecDestroy(&vB));
2189   PetscFunctionReturn(PETSC_SUCCESS);
2190 }
2191 
2192 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2193 {
2194   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2195   PetscInt    m = A->rmap->n;
2196   Vec         vB, vA;
2197 
2198   PetscFunctionBegin;
2199   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2200   PetscCall(MatGetRowSumAbs(a->A, vA));
2201   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2202   PetscCall(MatGetRowSumAbs(a->B, vB));
2203   PetscCall(VecAXPY(vA, 1.0, vB));
2204   PetscCall(VecDestroy(&vB));
2205   PetscCall(VecCopy(vA, v));
2206   PetscCall(VecDestroy(&vA));
2207   PetscFunctionReturn(PETSC_SUCCESS);
2208 }
2209 
2210 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2211 {
2212   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2213   PetscInt           m = A->rmap->n, n = A->cmap->n;
2214   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2215   PetscInt          *cmap = mat->garray;
2216   PetscInt          *diagIdx, *offdiagIdx;
2217   Vec                diagV, offdiagV;
2218   PetscScalar       *a, *diagA, *offdiagA;
2219   const PetscScalar *ba, *bav;
2220   PetscInt           r, j, col, ncols, *bi, *bj;
2221   Mat                B = mat->B;
2222   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2223 
2224   PetscFunctionBegin;
2225   /* When a process holds entire A and other processes have no entry */
2226   if (A->cmap->N == n) {
2227     PetscCall(VecGetArrayWrite(v, &diagA));
2228     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2229     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2230     PetscCall(VecDestroy(&diagV));
2231     PetscCall(VecRestoreArrayWrite(v, &diagA));
2232     PetscFunctionReturn(PETSC_SUCCESS);
2233   } else if (n == 0) {
2234     if (m) {
2235       PetscCall(VecGetArrayWrite(v, &a));
2236       for (r = 0; r < m; r++) {
2237         a[r] = 0.0;
2238         if (idx) idx[r] = -1;
2239       }
2240       PetscCall(VecRestoreArrayWrite(v, &a));
2241     }
2242     PetscFunctionReturn(PETSC_SUCCESS);
2243   }
2244 
2245   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2246   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2247   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2248   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2249 
2250   /* Get offdiagIdx[] for implicit 0.0 */
2251   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2252   ba = bav;
2253   bi = b->i;
2254   bj = b->j;
2255   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2256   for (r = 0; r < m; r++) {
2257     ncols = bi[r + 1] - bi[r];
2258     if (ncols == A->cmap->N - n) { /* Brow is dense */
2259       offdiagA[r]   = *ba;
2260       offdiagIdx[r] = cmap[0];
2261     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2262       offdiagA[r] = 0.0;
2263 
2264       /* Find first hole in the cmap */
2265       for (j = 0; j < ncols; j++) {
2266         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2267         if (col > j && j < cstart) {
2268           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2269           break;
2270         } else if (col > j + n && j >= cstart) {
2271           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2272           break;
2273         }
2274       }
2275       if (j == ncols && ncols < A->cmap->N - n) {
2276         /* a hole is outside compressed Bcols */
2277         if (ncols == 0) {
2278           if (cstart) {
2279             offdiagIdx[r] = 0;
2280           } else offdiagIdx[r] = cend;
2281         } else { /* ncols > 0 */
2282           offdiagIdx[r] = cmap[ncols - 1] + 1;
2283           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2284         }
2285       }
2286     }
2287 
2288     for (j = 0; j < ncols; j++) {
2289       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2290         offdiagA[r]   = *ba;
2291         offdiagIdx[r] = cmap[*bj];
2292       }
2293       ba++;
2294       bj++;
2295     }
2296   }
2297 
2298   PetscCall(VecGetArrayWrite(v, &a));
2299   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2300   for (r = 0; r < m; ++r) {
2301     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2302       a[r] = diagA[r];
2303       if (idx) idx[r] = cstart + diagIdx[r];
2304     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2305       a[r] = diagA[r];
2306       if (idx) {
2307         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2308           idx[r] = cstart + diagIdx[r];
2309         } else idx[r] = offdiagIdx[r];
2310       }
2311     } else {
2312       a[r] = offdiagA[r];
2313       if (idx) idx[r] = offdiagIdx[r];
2314     }
2315   }
2316   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2317   PetscCall(VecRestoreArrayWrite(v, &a));
2318   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2319   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2320   PetscCall(VecDestroy(&diagV));
2321   PetscCall(VecDestroy(&offdiagV));
2322   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2323   PetscFunctionReturn(PETSC_SUCCESS);
2324 }
2325 
2326 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2327 {
2328   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2329   PetscInt           m = A->rmap->n, n = A->cmap->n;
2330   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2331   PetscInt          *cmap = mat->garray;
2332   PetscInt          *diagIdx, *offdiagIdx;
2333   Vec                diagV, offdiagV;
2334   PetscScalar       *a, *diagA, *offdiagA;
2335   const PetscScalar *ba, *bav;
2336   PetscInt           r, j, col, ncols, *bi, *bj;
2337   Mat                B = mat->B;
2338   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2339 
2340   PetscFunctionBegin;
2341   /* When a process holds entire A and other processes have no entry */
2342   if (A->cmap->N == n) {
2343     PetscCall(VecGetArrayWrite(v, &diagA));
2344     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2345     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2346     PetscCall(VecDestroy(&diagV));
2347     PetscCall(VecRestoreArrayWrite(v, &diagA));
2348     PetscFunctionReturn(PETSC_SUCCESS);
2349   } else if (n == 0) {
2350     if (m) {
2351       PetscCall(VecGetArrayWrite(v, &a));
2352       for (r = 0; r < m; r++) {
2353         a[r] = PETSC_MAX_REAL;
2354         if (idx) idx[r] = -1;
2355       }
2356       PetscCall(VecRestoreArrayWrite(v, &a));
2357     }
2358     PetscFunctionReturn(PETSC_SUCCESS);
2359   }
2360 
2361   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2362   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2363   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2364   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2365 
2366   /* Get offdiagIdx[] for implicit 0.0 */
2367   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2368   ba = bav;
2369   bi = b->i;
2370   bj = b->j;
2371   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2372   for (r = 0; r < m; r++) {
2373     ncols = bi[r + 1] - bi[r];
2374     if (ncols == A->cmap->N - n) { /* Brow is dense */
2375       offdiagA[r]   = *ba;
2376       offdiagIdx[r] = cmap[0];
2377     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2378       offdiagA[r] = 0.0;
2379 
2380       /* Find first hole in the cmap */
2381       for (j = 0; j < ncols; j++) {
2382         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2383         if (col > j && j < cstart) {
2384           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2385           break;
2386         } else if (col > j + n && j >= cstart) {
2387           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2388           break;
2389         }
2390       }
2391       if (j == ncols && ncols < A->cmap->N - n) {
2392         /* a hole is outside compressed Bcols */
2393         if (ncols == 0) {
2394           if (cstart) {
2395             offdiagIdx[r] = 0;
2396           } else offdiagIdx[r] = cend;
2397         } else { /* ncols > 0 */
2398           offdiagIdx[r] = cmap[ncols - 1] + 1;
2399           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2400         }
2401       }
2402     }
2403 
2404     for (j = 0; j < ncols; j++) {
2405       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2406         offdiagA[r]   = *ba;
2407         offdiagIdx[r] = cmap[*bj];
2408       }
2409       ba++;
2410       bj++;
2411     }
2412   }
2413 
2414   PetscCall(VecGetArrayWrite(v, &a));
2415   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2416   for (r = 0; r < m; ++r) {
2417     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2418       a[r] = diagA[r];
2419       if (idx) idx[r] = cstart + diagIdx[r];
2420     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2421       a[r] = diagA[r];
2422       if (idx) {
2423         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2424           idx[r] = cstart + diagIdx[r];
2425         } else idx[r] = offdiagIdx[r];
2426       }
2427     } else {
2428       a[r] = offdiagA[r];
2429       if (idx) idx[r] = offdiagIdx[r];
2430     }
2431   }
2432   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2433   PetscCall(VecRestoreArrayWrite(v, &a));
2434   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2435   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2436   PetscCall(VecDestroy(&diagV));
2437   PetscCall(VecDestroy(&offdiagV));
2438   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2439   PetscFunctionReturn(PETSC_SUCCESS);
2440 }
2441 
2442 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2443 {
2444   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2445   PetscInt           m = A->rmap->n, n = A->cmap->n;
2446   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2447   PetscInt          *cmap = mat->garray;
2448   PetscInt          *diagIdx, *offdiagIdx;
2449   Vec                diagV, offdiagV;
2450   PetscScalar       *a, *diagA, *offdiagA;
2451   const PetscScalar *ba, *bav;
2452   PetscInt           r, j, col, ncols, *bi, *bj;
2453   Mat                B = mat->B;
2454   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2455 
2456   PetscFunctionBegin;
2457   /* When a process holds entire A and other processes have no entry */
2458   if (A->cmap->N == n) {
2459     PetscCall(VecGetArrayWrite(v, &diagA));
2460     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2461     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2462     PetscCall(VecDestroy(&diagV));
2463     PetscCall(VecRestoreArrayWrite(v, &diagA));
2464     PetscFunctionReturn(PETSC_SUCCESS);
2465   } else if (n == 0) {
2466     if (m) {
2467       PetscCall(VecGetArrayWrite(v, &a));
2468       for (r = 0; r < m; r++) {
2469         a[r] = PETSC_MIN_REAL;
2470         if (idx) idx[r] = -1;
2471       }
2472       PetscCall(VecRestoreArrayWrite(v, &a));
2473     }
2474     PetscFunctionReturn(PETSC_SUCCESS);
2475   }
2476 
2477   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2478   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2479   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2480   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2481 
2482   /* Get offdiagIdx[] for implicit 0.0 */
2483   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2484   ba = bav;
2485   bi = b->i;
2486   bj = b->j;
2487   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2488   for (r = 0; r < m; r++) {
2489     ncols = bi[r + 1] - bi[r];
2490     if (ncols == A->cmap->N - n) { /* Brow is dense */
2491       offdiagA[r]   = *ba;
2492       offdiagIdx[r] = cmap[0];
2493     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2494       offdiagA[r] = 0.0;
2495 
2496       /* Find first hole in the cmap */
2497       for (j = 0; j < ncols; j++) {
2498         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2499         if (col > j && j < cstart) {
2500           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2501           break;
2502         } else if (col > j + n && j >= cstart) {
2503           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2504           break;
2505         }
2506       }
2507       if (j == ncols && ncols < A->cmap->N - n) {
2508         /* a hole is outside compressed Bcols */
2509         if (ncols == 0) {
2510           if (cstart) {
2511             offdiagIdx[r] = 0;
2512           } else offdiagIdx[r] = cend;
2513         } else { /* ncols > 0 */
2514           offdiagIdx[r] = cmap[ncols - 1] + 1;
2515           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2516         }
2517       }
2518     }
2519 
2520     for (j = 0; j < ncols; j++) {
2521       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2522         offdiagA[r]   = *ba;
2523         offdiagIdx[r] = cmap[*bj];
2524       }
2525       ba++;
2526       bj++;
2527     }
2528   }
2529 
2530   PetscCall(VecGetArrayWrite(v, &a));
2531   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2532   for (r = 0; r < m; ++r) {
2533     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2534       a[r] = diagA[r];
2535       if (idx) idx[r] = cstart + diagIdx[r];
2536     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2537       a[r] = diagA[r];
2538       if (idx) {
2539         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2540           idx[r] = cstart + diagIdx[r];
2541         } else idx[r] = offdiagIdx[r];
2542       }
2543     } else {
2544       a[r] = offdiagA[r];
2545       if (idx) idx[r] = offdiagIdx[r];
2546     }
2547   }
2548   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2549   PetscCall(VecRestoreArrayWrite(v, &a));
2550   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2551   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2552   PetscCall(VecDestroy(&diagV));
2553   PetscCall(VecDestroy(&offdiagV));
2554   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2555   PetscFunctionReturn(PETSC_SUCCESS);
2556 }
2557 
2558 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2559 {
2560   Mat *dummy;
2561 
2562   PetscFunctionBegin;
2563   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2564   *newmat = *dummy;
2565   PetscCall(PetscFree(dummy));
2566   PetscFunctionReturn(PETSC_SUCCESS);
2567 }
2568 
2569 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2570 {
2571   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2572 
2573   PetscFunctionBegin;
2574   PetscCall(MatInvertBlockDiagonal(a->A, values));
2575   A->factorerrortype = a->A->factorerrortype;
2576   PetscFunctionReturn(PETSC_SUCCESS);
2577 }
2578 
2579 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2580 {
2581   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2582 
2583   PetscFunctionBegin;
2584   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2585   PetscCall(MatSetRandom(aij->A, rctx));
2586   if (x->assembled) {
2587     PetscCall(MatSetRandom(aij->B, rctx));
2588   } else {
2589     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2590   }
2591   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2592   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2593   PetscFunctionReturn(PETSC_SUCCESS);
2594 }
2595 
2596 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2597 {
2598   PetscFunctionBegin;
2599   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2600   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2601   PetscFunctionReturn(PETSC_SUCCESS);
2602 }
2603 
2604 /*@
2605   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2606 
2607   Not Collective
2608 
2609   Input Parameter:
2610 . A - the matrix
2611 
2612   Output Parameter:
2613 . nz - the number of nonzeros
2614 
2615   Level: advanced
2616 
2617 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2618 @*/
2619 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2620 {
2621   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2622   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2623   PetscBool   isaij;
2624 
2625   PetscFunctionBegin;
2626   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2627   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2628   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2629   PetscFunctionReturn(PETSC_SUCCESS);
2630 }
2631 
2632 /*@
2633   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2634 
2635   Collective
2636 
2637   Input Parameters:
2638 + A  - the matrix
2639 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2640 
2641   Level: advanced
2642 
2643 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2644 @*/
2645 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2646 {
2647   PetscFunctionBegin;
2648   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2649   PetscFunctionReturn(PETSC_SUCCESS);
2650 }
2651 
2652 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2653 {
2654   PetscBool sc = PETSC_FALSE, flg;
2655 
2656   PetscFunctionBegin;
2657   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2658   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2659   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2660   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2661   PetscOptionsHeadEnd();
2662   PetscFunctionReturn(PETSC_SUCCESS);
2663 }
2664 
2665 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2666 {
2667   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2668   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2669 
2670   PetscFunctionBegin;
2671   if (!Y->preallocated) {
2672     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2673   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2674     PetscInt nonew = aij->nonew;
2675     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2676     aij->nonew = nonew;
2677   }
2678   PetscCall(MatShift_Basic(Y, a));
2679   PetscFunctionReturn(PETSC_SUCCESS);
2680 }
2681 
2682 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2683 {
2684   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2685 
2686   PetscFunctionBegin;
2687   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2688   PetscCall(MatMissingDiagonal(a->A, missing, d));
2689   if (d) {
2690     PetscInt rstart;
2691     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2692     *d += rstart;
2693   }
2694   PetscFunctionReturn(PETSC_SUCCESS);
2695 }
2696 
2697 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2698 {
2699   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2700 
2701   PetscFunctionBegin;
2702   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2703   PetscFunctionReturn(PETSC_SUCCESS);
2704 }
2705 
2706 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2707 {
2708   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2709 
2710   PetscFunctionBegin;
2711   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2712   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2713   PetscFunctionReturn(PETSC_SUCCESS);
2714 }
2715 
2716 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2717                                        MatGetRow_MPIAIJ,
2718                                        MatRestoreRow_MPIAIJ,
2719                                        MatMult_MPIAIJ,
2720                                        /* 4*/ MatMultAdd_MPIAIJ,
2721                                        MatMultTranspose_MPIAIJ,
2722                                        MatMultTransposeAdd_MPIAIJ,
2723                                        NULL,
2724                                        NULL,
2725                                        NULL,
2726                                        /*10*/ NULL,
2727                                        NULL,
2728                                        NULL,
2729                                        MatSOR_MPIAIJ,
2730                                        MatTranspose_MPIAIJ,
2731                                        /*15*/ MatGetInfo_MPIAIJ,
2732                                        MatEqual_MPIAIJ,
2733                                        MatGetDiagonal_MPIAIJ,
2734                                        MatDiagonalScale_MPIAIJ,
2735                                        MatNorm_MPIAIJ,
2736                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2737                                        MatAssemblyEnd_MPIAIJ,
2738                                        MatSetOption_MPIAIJ,
2739                                        MatZeroEntries_MPIAIJ,
2740                                        /*24*/ MatZeroRows_MPIAIJ,
2741                                        NULL,
2742                                        NULL,
2743                                        NULL,
2744                                        NULL,
2745                                        /*29*/ MatSetUp_MPI_Hash,
2746                                        NULL,
2747                                        NULL,
2748                                        MatGetDiagonalBlock_MPIAIJ,
2749                                        NULL,
2750                                        /*34*/ MatDuplicate_MPIAIJ,
2751                                        NULL,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                        /*39*/ MatAXPY_MPIAIJ,
2756                                        MatCreateSubMatrices_MPIAIJ,
2757                                        MatIncreaseOverlap_MPIAIJ,
2758                                        MatGetValues_MPIAIJ,
2759                                        MatCopy_MPIAIJ,
2760                                        /*44*/ MatGetRowMax_MPIAIJ,
2761                                        MatScale_MPIAIJ,
2762                                        MatShift_MPIAIJ,
2763                                        MatDiagonalSet_MPIAIJ,
2764                                        MatZeroRowsColumns_MPIAIJ,
2765                                        /*49*/ MatSetRandom_MPIAIJ,
2766                                        MatGetRowIJ_MPIAIJ,
2767                                        MatRestoreRowIJ_MPIAIJ,
2768                                        NULL,
2769                                        NULL,
2770                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2771                                        NULL,
2772                                        MatSetUnfactored_MPIAIJ,
2773                                        MatPermute_MPIAIJ,
2774                                        NULL,
2775                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2776                                        MatDestroy_MPIAIJ,
2777                                        MatView_MPIAIJ,
2778                                        NULL,
2779                                        NULL,
2780                                        /*64*/ NULL,
2781                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2782                                        NULL,
2783                                        NULL,
2784                                        NULL,
2785                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2786                                        MatGetRowMinAbs_MPIAIJ,
2787                                        NULL,
2788                                        NULL,
2789                                        NULL,
2790                                        NULL,
2791                                        /*75*/ MatFDColoringApply_AIJ,
2792                                        MatSetFromOptions_MPIAIJ,
2793                                        NULL,
2794                                        NULL,
2795                                        MatFindZeroDiagonals_MPIAIJ,
2796                                        /*80*/ NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        /*83*/ MatLoad_MPIAIJ,
2800                                        NULL,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        /*89*/ NULL,
2806                                        NULL,
2807                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2811                                        NULL,
2812                                        NULL,
2813                                        NULL,
2814                                        MatBindToCPU_MPIAIJ,
2815                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2816                                        NULL,
2817                                        NULL,
2818                                        MatConjugate_MPIAIJ,
2819                                        NULL,
2820                                        /*104*/ MatSetValuesRow_MPIAIJ,
2821                                        MatRealPart_MPIAIJ,
2822                                        MatImaginaryPart_MPIAIJ,
2823                                        NULL,
2824                                        NULL,
2825                                        /*109*/ NULL,
2826                                        NULL,
2827                                        MatGetRowMin_MPIAIJ,
2828                                        NULL,
2829                                        MatMissingDiagonal_MPIAIJ,
2830                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2831                                        NULL,
2832                                        MatGetGhosts_MPIAIJ,
2833                                        NULL,
2834                                        NULL,
2835                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2836                                        NULL,
2837                                        NULL,
2838                                        NULL,
2839                                        MatGetMultiProcBlock_MPIAIJ,
2840                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2841                                        MatGetColumnReductions_MPIAIJ,
2842                                        MatInvertBlockDiagonal_MPIAIJ,
2843                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2844                                        MatCreateSubMatricesMPI_MPIAIJ,
2845                                        /*129*/ NULL,
2846                                        NULL,
2847                                        NULL,
2848                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2849                                        NULL,
2850                                        /*134*/ NULL,
2851                                        NULL,
2852                                        NULL,
2853                                        NULL,
2854                                        NULL,
2855                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2856                                        NULL,
2857                                        NULL,
2858                                        MatFDColoringSetUp_MPIXAIJ,
2859                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2860                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2861                                        /*145*/ NULL,
2862                                        NULL,
2863                                        NULL,
2864                                        MatCreateGraph_Simple_AIJ,
2865                                        NULL,
2866                                        /*150*/ NULL,
2867                                        MatEliminateZeros_MPIAIJ,
2868                                        MatGetRowSumAbs_MPIAIJ};
2869 
2870 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2871 {
2872   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2873 
2874   PetscFunctionBegin;
2875   PetscCall(MatStoreValues(aij->A));
2876   PetscCall(MatStoreValues(aij->B));
2877   PetscFunctionReturn(PETSC_SUCCESS);
2878 }
2879 
2880 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2881 {
2882   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2883 
2884   PetscFunctionBegin;
2885   PetscCall(MatRetrieveValues(aij->A));
2886   PetscCall(MatRetrieveValues(aij->B));
2887   PetscFunctionReturn(PETSC_SUCCESS);
2888 }
2889 
2890 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2891 {
2892   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2893   PetscMPIInt size;
2894 
2895   PetscFunctionBegin;
2896   if (B->hash_active) {
2897     B->ops[0]      = b->cops;
2898     B->hash_active = PETSC_FALSE;
2899   }
2900   PetscCall(PetscLayoutSetUp(B->rmap));
2901   PetscCall(PetscLayoutSetUp(B->cmap));
2902 
2903 #if defined(PETSC_USE_CTABLE)
2904   PetscCall(PetscHMapIDestroy(&b->colmap));
2905 #else
2906   PetscCall(PetscFree(b->colmap));
2907 #endif
2908   PetscCall(PetscFree(b->garray));
2909   PetscCall(VecDestroy(&b->lvec));
2910   PetscCall(VecScatterDestroy(&b->Mvctx));
2911 
2912   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2913 
2914   MatSeqXAIJGetOptions_Private(b->B);
2915   PetscCall(MatDestroy(&b->B));
2916   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2917   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2918   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2919   PetscCall(MatSetType(b->B, MATSEQAIJ));
2920   MatSeqXAIJRestoreOptions_Private(b->B);
2921 
2922   MatSeqXAIJGetOptions_Private(b->A);
2923   PetscCall(MatDestroy(&b->A));
2924   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2925   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2926   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2927   PetscCall(MatSetType(b->A, MATSEQAIJ));
2928   MatSeqXAIJRestoreOptions_Private(b->A);
2929 
2930   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2931   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2932   B->preallocated  = PETSC_TRUE;
2933   B->was_assembled = PETSC_FALSE;
2934   B->assembled     = PETSC_FALSE;
2935   PetscFunctionReturn(PETSC_SUCCESS);
2936 }
2937 
2938 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2939 {
2940   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2941 
2942   PetscFunctionBegin;
2943   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2944   PetscCall(PetscLayoutSetUp(B->rmap));
2945   PetscCall(PetscLayoutSetUp(B->cmap));
2946 
2947 #if defined(PETSC_USE_CTABLE)
2948   PetscCall(PetscHMapIDestroy(&b->colmap));
2949 #else
2950   PetscCall(PetscFree(b->colmap));
2951 #endif
2952   PetscCall(PetscFree(b->garray));
2953   PetscCall(VecDestroy(&b->lvec));
2954   PetscCall(VecScatterDestroy(&b->Mvctx));
2955 
2956   PetscCall(MatResetPreallocation(b->A));
2957   PetscCall(MatResetPreallocation(b->B));
2958   B->preallocated  = PETSC_TRUE;
2959   B->was_assembled = PETSC_FALSE;
2960   B->assembled     = PETSC_FALSE;
2961   PetscFunctionReturn(PETSC_SUCCESS);
2962 }
2963 
2964 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2965 {
2966   Mat         mat;
2967   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2968 
2969   PetscFunctionBegin;
2970   *newmat = NULL;
2971   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2972   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2973   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2974   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2975   a = (Mat_MPIAIJ *)mat->data;
2976 
2977   mat->factortype = matin->factortype;
2978   mat->assembled  = matin->assembled;
2979   mat->insertmode = NOT_SET_VALUES;
2980 
2981   a->size         = oldmat->size;
2982   a->rank         = oldmat->rank;
2983   a->donotstash   = oldmat->donotstash;
2984   a->roworiented  = oldmat->roworiented;
2985   a->rowindices   = NULL;
2986   a->rowvalues    = NULL;
2987   a->getrowactive = PETSC_FALSE;
2988 
2989   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2990   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2991   if (matin->hash_active) {
2992     PetscCall(MatSetUp(mat));
2993   } else {
2994     mat->preallocated = matin->preallocated;
2995     if (oldmat->colmap) {
2996 #if defined(PETSC_USE_CTABLE)
2997       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2998 #else
2999       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3000       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3001 #endif
3002     } else a->colmap = NULL;
3003     if (oldmat->garray) {
3004       PetscInt len;
3005       len = oldmat->B->cmap->n;
3006       PetscCall(PetscMalloc1(len + 1, &a->garray));
3007       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3008     } else a->garray = NULL;
3009 
3010     /* It may happen MatDuplicate is called with a non-assembled matrix
3011       In fact, MatDuplicate only requires the matrix to be preallocated
3012       This may happen inside a DMCreateMatrix_Shell */
3013     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3014     if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3015     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3016     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3017   }
3018   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3019   *newmat = mat;
3020   PetscFunctionReturn(PETSC_SUCCESS);
3021 }
3022 
3023 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3024 {
3025   PetscBool isbinary, ishdf5;
3026 
3027   PetscFunctionBegin;
3028   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3029   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3030   /* force binary viewer to load .info file if it has not yet done so */
3031   PetscCall(PetscViewerSetUp(viewer));
3032   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3033   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3034   if (isbinary) {
3035     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3036   } else if (ishdf5) {
3037 #if defined(PETSC_HAVE_HDF5)
3038     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3039 #else
3040     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3041 #endif
3042   } else {
3043     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3044   }
3045   PetscFunctionReturn(PETSC_SUCCESS);
3046 }
3047 
3048 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3049 {
3050   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3051   PetscInt    *rowidxs, *colidxs;
3052   PetscScalar *matvals;
3053 
3054   PetscFunctionBegin;
3055   PetscCall(PetscViewerSetUp(viewer));
3056 
3057   /* read in matrix header */
3058   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3059   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3060   M  = header[1];
3061   N  = header[2];
3062   nz = header[3];
3063   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3064   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3065   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3066 
3067   /* set block sizes from the viewer's .info file */
3068   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3069   /* set global sizes if not set already */
3070   if (mat->rmap->N < 0) mat->rmap->N = M;
3071   if (mat->cmap->N < 0) mat->cmap->N = N;
3072   PetscCall(PetscLayoutSetUp(mat->rmap));
3073   PetscCall(PetscLayoutSetUp(mat->cmap));
3074 
3075   /* check if the matrix sizes are correct */
3076   PetscCall(MatGetSize(mat, &rows, &cols));
3077   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3078 
3079   /* read in row lengths and build row indices */
3080   PetscCall(MatGetLocalSize(mat, &m, NULL));
3081   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3082   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3083   rowidxs[0] = 0;
3084   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3085   if (nz != PETSC_MAX_INT) {
3086     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3087     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3088   }
3089 
3090   /* read in column indices and matrix values */
3091   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3092   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3093   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3094   /* store matrix indices and values */
3095   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3096   PetscCall(PetscFree(rowidxs));
3097   PetscCall(PetscFree2(colidxs, matvals));
3098   PetscFunctionReturn(PETSC_SUCCESS);
3099 }
3100 
3101 /* Not scalable because of ISAllGather() unless getting all columns. */
3102 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3103 {
3104   IS          iscol_local;
3105   PetscBool   isstride;
3106   PetscMPIInt lisstride = 0, gisstride;
3107 
3108   PetscFunctionBegin;
3109   /* check if we are grabbing all columns*/
3110   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3111 
3112   if (isstride) {
3113     PetscInt start, len, mstart, mlen;
3114     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3115     PetscCall(ISGetLocalSize(iscol, &len));
3116     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3117     if (mstart == start && mlen - mstart == len) lisstride = 1;
3118   }
3119 
3120   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3121   if (gisstride) {
3122     PetscInt N;
3123     PetscCall(MatGetSize(mat, NULL, &N));
3124     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3125     PetscCall(ISSetIdentity(iscol_local));
3126     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3127   } else {
3128     PetscInt cbs;
3129     PetscCall(ISGetBlockSize(iscol, &cbs));
3130     PetscCall(ISAllGather(iscol, &iscol_local));
3131     PetscCall(ISSetBlockSize(iscol_local, cbs));
3132   }
3133 
3134   *isseq = iscol_local;
3135   PetscFunctionReturn(PETSC_SUCCESS);
3136 }
3137 
3138 /*
3139  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3140  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3141 
3142  Input Parameters:
3143 +   mat - matrix
3144 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3145            i.e., mat->rstart <= isrow[i] < mat->rend
3146 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3147            i.e., mat->cstart <= iscol[i] < mat->cend
3148 
3149  Output Parameters:
3150 +   isrow_d - sequential row index set for retrieving mat->A
3151 .   iscol_d - sequential  column index set for retrieving mat->A
3152 .   iscol_o - sequential column index set for retrieving mat->B
3153 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3154  */
3155 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3156 {
3157   Vec             x, cmap;
3158   const PetscInt *is_idx;
3159   PetscScalar    *xarray, *cmaparray;
3160   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3161   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3162   Mat             B    = a->B;
3163   Vec             lvec = a->lvec, lcmap;
3164   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3165   MPI_Comm        comm;
3166   VecScatter      Mvctx = a->Mvctx;
3167 
3168   PetscFunctionBegin;
3169   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3170   PetscCall(ISGetLocalSize(iscol, &ncols));
3171 
3172   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3173   PetscCall(MatCreateVecs(mat, &x, NULL));
3174   PetscCall(VecSet(x, -1.0));
3175   PetscCall(VecDuplicate(x, &cmap));
3176   PetscCall(VecSet(cmap, -1.0));
3177 
3178   /* Get start indices */
3179   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3180   isstart -= ncols;
3181   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3182 
3183   PetscCall(ISGetIndices(iscol, &is_idx));
3184   PetscCall(VecGetArray(x, &xarray));
3185   PetscCall(VecGetArray(cmap, &cmaparray));
3186   PetscCall(PetscMalloc1(ncols, &idx));
3187   for (i = 0; i < ncols; i++) {
3188     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3189     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3190     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3191   }
3192   PetscCall(VecRestoreArray(x, &xarray));
3193   PetscCall(VecRestoreArray(cmap, &cmaparray));
3194   PetscCall(ISRestoreIndices(iscol, &is_idx));
3195 
3196   /* Get iscol_d */
3197   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3198   PetscCall(ISGetBlockSize(iscol, &i));
3199   PetscCall(ISSetBlockSize(*iscol_d, i));
3200 
3201   /* Get isrow_d */
3202   PetscCall(ISGetLocalSize(isrow, &m));
3203   rstart = mat->rmap->rstart;
3204   PetscCall(PetscMalloc1(m, &idx));
3205   PetscCall(ISGetIndices(isrow, &is_idx));
3206   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3207   PetscCall(ISRestoreIndices(isrow, &is_idx));
3208 
3209   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3210   PetscCall(ISGetBlockSize(isrow, &i));
3211   PetscCall(ISSetBlockSize(*isrow_d, i));
3212 
3213   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3214   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3215   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3216 
3217   PetscCall(VecDuplicate(lvec, &lcmap));
3218 
3219   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3220   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3221 
3222   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3223   /* off-process column indices */
3224   count = 0;
3225   PetscCall(PetscMalloc1(Bn, &idx));
3226   PetscCall(PetscMalloc1(Bn, &cmap1));
3227 
3228   PetscCall(VecGetArray(lvec, &xarray));
3229   PetscCall(VecGetArray(lcmap, &cmaparray));
3230   for (i = 0; i < Bn; i++) {
3231     if (PetscRealPart(xarray[i]) > -1.0) {
3232       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3233       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3234       count++;
3235     }
3236   }
3237   PetscCall(VecRestoreArray(lvec, &xarray));
3238   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3239 
3240   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3241   /* cannot ensure iscol_o has same blocksize as iscol! */
3242 
3243   PetscCall(PetscFree(idx));
3244   *garray = cmap1;
3245 
3246   PetscCall(VecDestroy(&x));
3247   PetscCall(VecDestroy(&cmap));
3248   PetscCall(VecDestroy(&lcmap));
3249   PetscFunctionReturn(PETSC_SUCCESS);
3250 }
3251 
3252 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3253 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3254 {
3255   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3256   Mat         M = NULL;
3257   MPI_Comm    comm;
3258   IS          iscol_d, isrow_d, iscol_o;
3259   Mat         Asub = NULL, Bsub = NULL;
3260   PetscInt    n;
3261 
3262   PetscFunctionBegin;
3263   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3264 
3265   if (call == MAT_REUSE_MATRIX) {
3266     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3267     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3268     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3269 
3270     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3271     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3272 
3273     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3274     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3275 
3276     /* Update diagonal and off-diagonal portions of submat */
3277     asub = (Mat_MPIAIJ *)(*submat)->data;
3278     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3279     PetscCall(ISGetLocalSize(iscol_o, &n));
3280     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3281     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3282     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3283 
3284   } else { /* call == MAT_INITIAL_MATRIX) */
3285     const PetscInt *garray;
3286     PetscInt        BsubN;
3287 
3288     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3289     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3290 
3291     /* Create local submatrices Asub and Bsub */
3292     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3293     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3294 
3295     /* Create submatrix M */
3296     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3297 
3298     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3299     asub = (Mat_MPIAIJ *)M->data;
3300 
3301     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3302     n = asub->B->cmap->N;
3303     if (BsubN > n) {
3304       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3305       const PetscInt *idx;
3306       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3307       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3308 
3309       PetscCall(PetscMalloc1(n, &idx_new));
3310       j = 0;
3311       PetscCall(ISGetIndices(iscol_o, &idx));
3312       for (i = 0; i < n; i++) {
3313         if (j >= BsubN) break;
3314         while (subgarray[i] > garray[j]) j++;
3315 
3316         if (subgarray[i] == garray[j]) {
3317           idx_new[i] = idx[j++];
3318         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3319       }
3320       PetscCall(ISRestoreIndices(iscol_o, &idx));
3321 
3322       PetscCall(ISDestroy(&iscol_o));
3323       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3324 
3325     } else if (BsubN < n) {
3326       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3327     }
3328 
3329     PetscCall(PetscFree(garray));
3330     *submat = M;
3331 
3332     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3333     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3334     PetscCall(ISDestroy(&isrow_d));
3335 
3336     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3337     PetscCall(ISDestroy(&iscol_d));
3338 
3339     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3340     PetscCall(ISDestroy(&iscol_o));
3341   }
3342   PetscFunctionReturn(PETSC_SUCCESS);
3343 }
3344 
3345 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3346 {
3347   IS        iscol_local = NULL, isrow_d;
3348   PetscInt  csize;
3349   PetscInt  n, i, j, start, end;
3350   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3351   MPI_Comm  comm;
3352 
3353   PetscFunctionBegin;
3354   /* If isrow has same processor distribution as mat,
3355      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3356   if (call == MAT_REUSE_MATRIX) {
3357     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3358     if (isrow_d) {
3359       sameRowDist  = PETSC_TRUE;
3360       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3361     } else {
3362       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3363       if (iscol_local) {
3364         sameRowDist  = PETSC_TRUE;
3365         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3366       }
3367     }
3368   } else {
3369     /* Check if isrow has same processor distribution as mat */
3370     sameDist[0] = PETSC_FALSE;
3371     PetscCall(ISGetLocalSize(isrow, &n));
3372     if (!n) {
3373       sameDist[0] = PETSC_TRUE;
3374     } else {
3375       PetscCall(ISGetMinMax(isrow, &i, &j));
3376       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3377       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3378     }
3379 
3380     /* Check if iscol has same processor distribution as mat */
3381     sameDist[1] = PETSC_FALSE;
3382     PetscCall(ISGetLocalSize(iscol, &n));
3383     if (!n) {
3384       sameDist[1] = PETSC_TRUE;
3385     } else {
3386       PetscCall(ISGetMinMax(iscol, &i, &j));
3387       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3388       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3389     }
3390 
3391     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3392     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3393     sameRowDist = tsameDist[0];
3394   }
3395 
3396   if (sameRowDist) {
3397     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3398       /* isrow and iscol have same processor distribution as mat */
3399       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3400       PetscFunctionReturn(PETSC_SUCCESS);
3401     } else { /* sameRowDist */
3402       /* isrow has same processor distribution as mat */
3403       if (call == MAT_INITIAL_MATRIX) {
3404         PetscBool sorted;
3405         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3406         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3407         PetscCall(ISGetSize(iscol, &i));
3408         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3409 
3410         PetscCall(ISSorted(iscol_local, &sorted));
3411         if (sorted) {
3412           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3413           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3414           PetscFunctionReturn(PETSC_SUCCESS);
3415         }
3416       } else { /* call == MAT_REUSE_MATRIX */
3417         IS iscol_sub;
3418         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3419         if (iscol_sub) {
3420           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3421           PetscFunctionReturn(PETSC_SUCCESS);
3422         }
3423       }
3424     }
3425   }
3426 
3427   /* General case: iscol -> iscol_local which has global size of iscol */
3428   if (call == MAT_REUSE_MATRIX) {
3429     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3430     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3431   } else {
3432     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3433   }
3434 
3435   PetscCall(ISGetLocalSize(iscol, &csize));
3436   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3437 
3438   if (call == MAT_INITIAL_MATRIX) {
3439     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3440     PetscCall(ISDestroy(&iscol_local));
3441   }
3442   PetscFunctionReturn(PETSC_SUCCESS);
3443 }
3444 
3445 /*@C
3446   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3447   and "off-diagonal" part of the matrix in CSR format.
3448 
3449   Collective
3450 
3451   Input Parameters:
3452 + comm   - MPI communicator
3453 . A      - "diagonal" portion of matrix
3454 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3455 - garray - global index of `B` columns
3456 
3457   Output Parameter:
3458 . mat - the matrix, with input `A` as its local diagonal matrix
3459 
3460   Level: advanced
3461 
3462   Notes:
3463   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3464 
3465   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3466 
3467 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3468 @*/
3469 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3470 {
3471   Mat_MPIAIJ        *maij;
3472   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3473   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3474   const PetscScalar *oa;
3475   Mat                Bnew;
3476   PetscInt           m, n, N;
3477   MatType            mpi_mat_type;
3478 
3479   PetscFunctionBegin;
3480   PetscCall(MatCreate(comm, mat));
3481   PetscCall(MatGetSize(A, &m, &n));
3482   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3483   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3484   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3485   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3486 
3487   /* Get global columns of mat */
3488   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3489 
3490   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3491   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3492   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3493   PetscCall(MatSetType(*mat, mpi_mat_type));
3494 
3495   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3496   maij = (Mat_MPIAIJ *)(*mat)->data;
3497 
3498   (*mat)->preallocated = PETSC_TRUE;
3499 
3500   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3501   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3502 
3503   /* Set A as diagonal portion of *mat */
3504   maij->A = A;
3505 
3506   nz = oi[m];
3507   for (i = 0; i < nz; i++) {
3508     col   = oj[i];
3509     oj[i] = garray[col];
3510   }
3511 
3512   /* Set Bnew as off-diagonal portion of *mat */
3513   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3514   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3515   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3516   bnew        = (Mat_SeqAIJ *)Bnew->data;
3517   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3518   maij->B     = Bnew;
3519 
3520   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3521 
3522   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3523   b->free_a       = PETSC_FALSE;
3524   b->free_ij      = PETSC_FALSE;
3525   PetscCall(MatDestroy(&B));
3526 
3527   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3528   bnew->free_a       = PETSC_TRUE;
3529   bnew->free_ij      = PETSC_TRUE;
3530 
3531   /* condense columns of maij->B */
3532   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3533   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3534   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3535   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3536   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3537   PetscFunctionReturn(PETSC_SUCCESS);
3538 }
3539 
3540 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3541 
3542 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3543 {
3544   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3545   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3546   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3547   Mat             M, Msub, B = a->B;
3548   MatScalar      *aa;
3549   Mat_SeqAIJ     *aij;
3550   PetscInt       *garray = a->garray, *colsub, Ncols;
3551   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3552   IS              iscol_sub, iscmap;
3553   const PetscInt *is_idx, *cmap;
3554   PetscBool       allcolumns = PETSC_FALSE;
3555   MPI_Comm        comm;
3556 
3557   PetscFunctionBegin;
3558   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3559   if (call == MAT_REUSE_MATRIX) {
3560     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3561     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3562     PetscCall(ISGetLocalSize(iscol_sub, &count));
3563 
3564     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3565     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3566 
3567     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3568     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3569 
3570     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3571 
3572   } else { /* call == MAT_INITIAL_MATRIX) */
3573     PetscBool flg;
3574 
3575     PetscCall(ISGetLocalSize(iscol, &n));
3576     PetscCall(ISGetSize(iscol, &Ncols));
3577 
3578     /* (1) iscol -> nonscalable iscol_local */
3579     /* Check for special case: each processor gets entire matrix columns */
3580     PetscCall(ISIdentity(iscol_local, &flg));
3581     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3582     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3583     if (allcolumns) {
3584       iscol_sub = iscol_local;
3585       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3586       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3587 
3588     } else {
3589       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3590       PetscInt *idx, *cmap1, k;
3591       PetscCall(PetscMalloc1(Ncols, &idx));
3592       PetscCall(PetscMalloc1(Ncols, &cmap1));
3593       PetscCall(ISGetIndices(iscol_local, &is_idx));
3594       count = 0;
3595       k     = 0;
3596       for (i = 0; i < Ncols; i++) {
3597         j = is_idx[i];
3598         if (j >= cstart && j < cend) {
3599           /* diagonal part of mat */
3600           idx[count]     = j;
3601           cmap1[count++] = i; /* column index in submat */
3602         } else if (Bn) {
3603           /* off-diagonal part of mat */
3604           if (j == garray[k]) {
3605             idx[count]     = j;
3606             cmap1[count++] = i; /* column index in submat */
3607           } else if (j > garray[k]) {
3608             while (j > garray[k] && k < Bn - 1) k++;
3609             if (j == garray[k]) {
3610               idx[count]     = j;
3611               cmap1[count++] = i; /* column index in submat */
3612             }
3613           }
3614         }
3615       }
3616       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3617 
3618       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3619       PetscCall(ISGetBlockSize(iscol, &cbs));
3620       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3621 
3622       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3623     }
3624 
3625     /* (3) Create sequential Msub */
3626     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3627   }
3628 
3629   PetscCall(ISGetLocalSize(iscol_sub, &count));
3630   aij = (Mat_SeqAIJ *)(Msub)->data;
3631   ii  = aij->i;
3632   PetscCall(ISGetIndices(iscmap, &cmap));
3633 
3634   /*
3635       m - number of local rows
3636       Ncols - number of columns (same on all processors)
3637       rstart - first row in new global matrix generated
3638   */
3639   PetscCall(MatGetSize(Msub, &m, NULL));
3640 
3641   if (call == MAT_INITIAL_MATRIX) {
3642     /* (4) Create parallel newmat */
3643     PetscMPIInt rank, size;
3644     PetscInt    csize;
3645 
3646     PetscCallMPI(MPI_Comm_size(comm, &size));
3647     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3648 
3649     /*
3650         Determine the number of non-zeros in the diagonal and off-diagonal
3651         portions of the matrix in order to do correct preallocation
3652     */
3653 
3654     /* first get start and end of "diagonal" columns */
3655     PetscCall(ISGetLocalSize(iscol, &csize));
3656     if (csize == PETSC_DECIDE) {
3657       PetscCall(ISGetSize(isrow, &mglobal));
3658       if (mglobal == Ncols) { /* square matrix */
3659         nlocal = m;
3660       } else {
3661         nlocal = Ncols / size + ((Ncols % size) > rank);
3662       }
3663     } else {
3664       nlocal = csize;
3665     }
3666     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3667     rstart = rend - nlocal;
3668     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3669 
3670     /* next, compute all the lengths */
3671     jj = aij->j;
3672     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3673     olens = dlens + m;
3674     for (i = 0; i < m; i++) {
3675       jend = ii[i + 1] - ii[i];
3676       olen = 0;
3677       dlen = 0;
3678       for (j = 0; j < jend; j++) {
3679         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3680         else dlen++;
3681         jj++;
3682       }
3683       olens[i] = olen;
3684       dlens[i] = dlen;
3685     }
3686 
3687     PetscCall(ISGetBlockSize(isrow, &bs));
3688     PetscCall(ISGetBlockSize(iscol, &cbs));
3689 
3690     PetscCall(MatCreate(comm, &M));
3691     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3692     PetscCall(MatSetBlockSizes(M, bs, cbs));
3693     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3694     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3695     PetscCall(PetscFree(dlens));
3696 
3697   } else { /* call == MAT_REUSE_MATRIX */
3698     M = *newmat;
3699     PetscCall(MatGetLocalSize(M, &i, NULL));
3700     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3701     PetscCall(MatZeroEntries(M));
3702     /*
3703          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3704        rather than the slower MatSetValues().
3705     */
3706     M->was_assembled = PETSC_TRUE;
3707     M->assembled     = PETSC_FALSE;
3708   }
3709 
3710   /* (5) Set values of Msub to *newmat */
3711   PetscCall(PetscMalloc1(count, &colsub));
3712   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3713 
3714   jj = aij->j;
3715   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3716   for (i = 0; i < m; i++) {
3717     row = rstart + i;
3718     nz  = ii[i + 1] - ii[i];
3719     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3720     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3721     jj += nz;
3722     aa += nz;
3723   }
3724   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3725   PetscCall(ISRestoreIndices(iscmap, &cmap));
3726 
3727   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3728   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3729 
3730   PetscCall(PetscFree(colsub));
3731 
3732   /* save Msub, iscol_sub and iscmap used in processor for next request */
3733   if (call == MAT_INITIAL_MATRIX) {
3734     *newmat = M;
3735     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3736     PetscCall(MatDestroy(&Msub));
3737 
3738     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3739     PetscCall(ISDestroy(&iscol_sub));
3740 
3741     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3742     PetscCall(ISDestroy(&iscmap));
3743 
3744     if (iscol_local) {
3745       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3746       PetscCall(ISDestroy(&iscol_local));
3747     }
3748   }
3749   PetscFunctionReturn(PETSC_SUCCESS);
3750 }
3751 
3752 /*
3753     Not great since it makes two copies of the submatrix, first an SeqAIJ
3754   in local and then by concatenating the local matrices the end result.
3755   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3756 
3757   This requires a sequential iscol with all indices.
3758 */
3759 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3760 {
3761   PetscMPIInt rank, size;
3762   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3763   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3764   Mat         M, Mreuse;
3765   MatScalar  *aa, *vwork;
3766   MPI_Comm    comm;
3767   Mat_SeqAIJ *aij;
3768   PetscBool   colflag, allcolumns = PETSC_FALSE;
3769 
3770   PetscFunctionBegin;
3771   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3772   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3773   PetscCallMPI(MPI_Comm_size(comm, &size));
3774 
3775   /* Check for special case: each processor gets entire matrix columns */
3776   PetscCall(ISIdentity(iscol, &colflag));
3777   PetscCall(ISGetLocalSize(iscol, &n));
3778   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3779   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3780 
3781   if (call == MAT_REUSE_MATRIX) {
3782     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3783     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3784     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3785   } else {
3786     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3787   }
3788 
3789   /*
3790       m - number of local rows
3791       n - number of columns (same on all processors)
3792       rstart - first row in new global matrix generated
3793   */
3794   PetscCall(MatGetSize(Mreuse, &m, &n));
3795   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3796   if (call == MAT_INITIAL_MATRIX) {
3797     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3798     ii  = aij->i;
3799     jj  = aij->j;
3800 
3801     /*
3802         Determine the number of non-zeros in the diagonal and off-diagonal
3803         portions of the matrix in order to do correct preallocation
3804     */
3805 
3806     /* first get start and end of "diagonal" columns */
3807     if (csize == PETSC_DECIDE) {
3808       PetscCall(ISGetSize(isrow, &mglobal));
3809       if (mglobal == n) { /* square matrix */
3810         nlocal = m;
3811       } else {
3812         nlocal = n / size + ((n % size) > rank);
3813       }
3814     } else {
3815       nlocal = csize;
3816     }
3817     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3818     rstart = rend - nlocal;
3819     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3820 
3821     /* next, compute all the lengths */
3822     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3823     olens = dlens + m;
3824     for (i = 0; i < m; i++) {
3825       jend = ii[i + 1] - ii[i];
3826       olen = 0;
3827       dlen = 0;
3828       for (j = 0; j < jend; j++) {
3829         if (*jj < rstart || *jj >= rend) olen++;
3830         else dlen++;
3831         jj++;
3832       }
3833       olens[i] = olen;
3834       dlens[i] = dlen;
3835     }
3836     PetscCall(MatCreate(comm, &M));
3837     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3838     PetscCall(MatSetBlockSizes(M, bs, cbs));
3839     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3840     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3841     PetscCall(PetscFree(dlens));
3842   } else {
3843     PetscInt ml, nl;
3844 
3845     M = *newmat;
3846     PetscCall(MatGetLocalSize(M, &ml, &nl));
3847     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3848     PetscCall(MatZeroEntries(M));
3849     /*
3850          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3851        rather than the slower MatSetValues().
3852     */
3853     M->was_assembled = PETSC_TRUE;
3854     M->assembled     = PETSC_FALSE;
3855   }
3856   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3857   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3858   ii  = aij->i;
3859   jj  = aij->j;
3860 
3861   /* trigger copy to CPU if needed */
3862   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3863   for (i = 0; i < m; i++) {
3864     row   = rstart + i;
3865     nz    = ii[i + 1] - ii[i];
3866     cwork = jj;
3867     jj    = PetscSafePointerPlusOffset(jj, nz);
3868     vwork = aa;
3869     aa    = PetscSafePointerPlusOffset(aa, nz);
3870     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3871   }
3872   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3873 
3874   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3875   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3876   *newmat = M;
3877 
3878   /* save submatrix used in processor for next request */
3879   if (call == MAT_INITIAL_MATRIX) {
3880     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3881     PetscCall(MatDestroy(&Mreuse));
3882   }
3883   PetscFunctionReturn(PETSC_SUCCESS);
3884 }
3885 
3886 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3887 {
3888   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3889   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3890   const PetscInt *JJ;
3891   PetscBool       nooffprocentries;
3892   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3893 
3894   PetscFunctionBegin;
3895   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3896 
3897   PetscCall(PetscLayoutSetUp(B->rmap));
3898   PetscCall(PetscLayoutSetUp(B->cmap));
3899   m      = B->rmap->n;
3900   cstart = B->cmap->rstart;
3901   cend   = B->cmap->rend;
3902   rstart = B->rmap->rstart;
3903 
3904   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3905 
3906   if (PetscDefined(USE_DEBUG)) {
3907     for (i = 0; i < m; i++) {
3908       nnz = Ii[i + 1] - Ii[i];
3909       JJ  = PetscSafePointerPlusOffset(J, Ii[i]);
3910       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3911       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3912       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3913     }
3914   }
3915 
3916   for (i = 0; i < m; i++) {
3917     nnz     = Ii[i + 1] - Ii[i];
3918     JJ      = PetscSafePointerPlusOffset(J, Ii[i]);
3919     nnz_max = PetscMax(nnz_max, nnz);
3920     d       = 0;
3921     for (j = 0; j < nnz; j++) {
3922       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3923     }
3924     d_nnz[i] = d;
3925     o_nnz[i] = nnz - d;
3926   }
3927   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3928   PetscCall(PetscFree2(d_nnz, o_nnz));
3929 
3930   for (i = 0; i < m; i++) {
3931     ii = i + rstart;
3932     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES));
3933   }
3934   nooffprocentries    = B->nooffprocentries;
3935   B->nooffprocentries = PETSC_TRUE;
3936   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3937   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3938   B->nooffprocentries = nooffprocentries;
3939 
3940   /* count number of entries below block diagonal */
3941   PetscCall(PetscFree(Aij->ld));
3942   PetscCall(PetscCalloc1(m, &ld));
3943   Aij->ld = ld;
3944   for (i = 0; i < m; i++) {
3945     nnz = Ii[i + 1] - Ii[i];
3946     j   = 0;
3947     while (j < nnz && J[j] < cstart) j++;
3948     ld[i] = j;
3949     if (J) J += nnz;
3950   }
3951 
3952   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3953   PetscFunctionReturn(PETSC_SUCCESS);
3954 }
3955 
3956 /*@
3957   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3958   (the default parallel PETSc format).
3959 
3960   Collective
3961 
3962   Input Parameters:
3963 + B - the matrix
3964 . i - the indices into `j` for the start of each local row (indices start with zero)
3965 . j - the column indices for each local row (indices start with zero)
3966 - v - optional values in the matrix
3967 
3968   Level: developer
3969 
3970   Notes:
3971   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3972   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3973   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3974 
3975   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3976 
3977   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3978 
3979   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3980 
3981   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3982   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3983 
3984   The format which is used for the sparse matrix input, is equivalent to a
3985   row-major ordering.. i.e for the following matrix, the input data expected is
3986   as shown
3987 .vb
3988         1 0 0
3989         2 0 3     P0
3990        -------
3991         4 5 6     P1
3992 
3993      Process0 [P0] rows_owned=[0,1]
3994         i =  {0,1,3}  [size = nrow+1  = 2+1]
3995         j =  {0,0,2}  [size = 3]
3996         v =  {1,2,3}  [size = 3]
3997 
3998      Process1 [P1] rows_owned=[2]
3999         i =  {0,3}    [size = nrow+1  = 1+1]
4000         j =  {0,1,2}  [size = 3]
4001         v =  {4,5,6}  [size = 3]
4002 .ve
4003 
4004 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4005           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4006 @*/
4007 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4008 {
4009   PetscFunctionBegin;
4010   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4011   PetscFunctionReturn(PETSC_SUCCESS);
4012 }
4013 
4014 /*@C
4015   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4016   (the default parallel PETSc format).  For good matrix assembly performance
4017   the user should preallocate the matrix storage by setting the parameters
4018   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4019 
4020   Collective
4021 
4022   Input Parameters:
4023 + B     - the matrix
4024 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4025            (same value is used for all local rows)
4026 . d_nnz - array containing the number of nonzeros in the various rows of the
4027            DIAGONAL portion of the local submatrix (possibly different for each row)
4028            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4029            The size of this array is equal to the number of local rows, i.e 'm'.
4030            For matrices that will be factored, you must leave room for (and set)
4031            the diagonal entry even if it is zero.
4032 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4033            submatrix (same value is used for all local rows).
4034 - o_nnz - array containing the number of nonzeros in the various rows of the
4035            OFF-DIAGONAL portion of the local submatrix (possibly different for
4036            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4037            structure. The size of this array is equal to the number
4038            of local rows, i.e 'm'.
4039 
4040   Example Usage:
4041   Consider the following 8x8 matrix with 34 non-zero values, that is
4042   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4043   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4044   as follows
4045 
4046 .vb
4047             1  2  0  |  0  3  0  |  0  4
4048     Proc0   0  5  6  |  7  0  0  |  8  0
4049             9  0 10  | 11  0  0  | 12  0
4050     -------------------------------------
4051            13  0 14  | 15 16 17  |  0  0
4052     Proc1   0 18  0  | 19 20 21  |  0  0
4053             0  0  0  | 22 23  0  | 24  0
4054     -------------------------------------
4055     Proc2  25 26 27  |  0  0 28  | 29  0
4056            30  0  0  | 31 32 33  |  0 34
4057 .ve
4058 
4059   This can be represented as a collection of submatrices as
4060 .vb
4061       A B C
4062       D E F
4063       G H I
4064 .ve
4065 
4066   Where the submatrices A,B,C are owned by proc0, D,E,F are
4067   owned by proc1, G,H,I are owned by proc2.
4068 
4069   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4070   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4071   The 'M','N' parameters are 8,8, and have the same values on all procs.
4072 
4073   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4074   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4075   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4076   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4077   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4078   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4079 
4080   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4081   allocated for every row of the local diagonal submatrix, and `o_nz`
4082   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4083   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4084   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4085   In this case, the values of `d_nz`, `o_nz` are
4086 .vb
4087      proc0  dnz = 2, o_nz = 2
4088      proc1  dnz = 3, o_nz = 2
4089      proc2  dnz = 1, o_nz = 4
4090 .ve
4091   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4092   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4093   for proc3. i.e we are using 12+15+10=37 storage locations to store
4094   34 values.
4095 
4096   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4097   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4098   In the above case the values for `d_nnz`, `o_nnz` are
4099 .vb
4100      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4101      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4102      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4103 .ve
4104   Here the space allocated is sum of all the above values i.e 34, and
4105   hence pre-allocation is perfect.
4106 
4107   Level: intermediate
4108 
4109   Notes:
4110   If the *_nnz parameter is given then the *_nz parameter is ignored
4111 
4112   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4113   storage.  The stored row and column indices begin with zero.
4114   See [Sparse Matrices](sec_matsparse) for details.
4115 
4116   The parallel matrix is partitioned such that the first m0 rows belong to
4117   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4118   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4119 
4120   The DIAGONAL portion of the local submatrix of a processor can be defined
4121   as the submatrix which is obtained by extraction the part corresponding to
4122   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4123   first row that belongs to the processor, r2 is the last row belonging to
4124   the this processor, and c1-c2 is range of indices of the local part of a
4125   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4126   common case of a square matrix, the row and column ranges are the same and
4127   the DIAGONAL part is also square. The remaining portion of the local
4128   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4129 
4130   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4131 
4132   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4133   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4134   You can also run with the option `-info` and look for messages with the string
4135   malloc in them to see if additional memory allocation was needed.
4136 
4137 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4138           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4139 @*/
4140 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4141 {
4142   PetscFunctionBegin;
4143   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4144   PetscValidType(B, 1);
4145   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4146   PetscFunctionReturn(PETSC_SUCCESS);
4147 }
4148 
4149 /*@
4150   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4151   CSR format for the local rows.
4152 
4153   Collective
4154 
4155   Input Parameters:
4156 + comm - MPI communicator
4157 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4158 . n    - This value should be the same as the local size used in creating the
4159          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4160          calculated if `N` is given) For square matrices n is almost always `m`.
4161 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4162 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4163 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4164 . j    - global column indices
4165 - a    - optional matrix values
4166 
4167   Output Parameter:
4168 . mat - the matrix
4169 
4170   Level: intermediate
4171 
4172   Notes:
4173   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4174   thus you CANNOT change the matrix entries by changing the values of a[] after you have
4175   called this routine. Use `MatCreateMPIAIJWithSplitArray()` to avoid needing to copy the arrays.
4176 
4177   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4178 
4179   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4180 
4181   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4182   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4183 
4184   The format which is used for the sparse matrix input, is equivalent to a
4185   row-major ordering.. i.e for the following matrix, the input data expected is
4186   as shown
4187 .vb
4188         1 0 0
4189         2 0 3     P0
4190        -------
4191         4 5 6     P1
4192 
4193      Process0 [P0] rows_owned=[0,1]
4194         i =  {0,1,3}  [size = nrow+1  = 2+1]
4195         j =  {0,0,2}  [size = 3]
4196         v =  {1,2,3}  [size = 3]
4197 
4198      Process1 [P1] rows_owned=[2]
4199         i =  {0,3}    [size = nrow+1  = 1+1]
4200         j =  {0,1,2}  [size = 3]
4201         v =  {4,5,6}  [size = 3]
4202 .ve
4203 
4204 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4205           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4206 @*/
4207 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4208 {
4209   PetscFunctionBegin;
4210   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4211   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4212   PetscCall(MatCreate(comm, mat));
4213   PetscCall(MatSetSizes(*mat, m, n, M, N));
4214   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4215   PetscCall(MatSetType(*mat, MATMPIAIJ));
4216   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4217   PetscFunctionReturn(PETSC_SUCCESS);
4218 }
4219 
4220 /*@
4221   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4222   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4223   from `MatCreateMPIAIJWithArrays()`
4224 
4225   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4226 
4227   Collective
4228 
4229   Input Parameters:
4230 + mat - the matrix
4231 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4232 . n   - This value should be the same as the local size used in creating the
4233        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4234        calculated if N is given) For square matrices n is almost always m.
4235 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4236 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4237 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4238 . J   - column indices
4239 - v   - matrix values
4240 
4241   Level: deprecated
4242 
4243 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4244           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4245 @*/
4246 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4247 {
4248   PetscInt        nnz, i;
4249   PetscBool       nooffprocentries;
4250   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4251   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4252   PetscScalar    *ad, *ao;
4253   PetscInt        ldi, Iii, md;
4254   const PetscInt *Adi = Ad->i;
4255   PetscInt       *ld  = Aij->ld;
4256 
4257   PetscFunctionBegin;
4258   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4259   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4260   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4261   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4262 
4263   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4264   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4265 
4266   for (i = 0; i < m; i++) {
4267     if (PetscDefined(USE_DEBUG)) {
4268       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4269         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4270         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4271       }
4272     }
4273     nnz = Ii[i + 1] - Ii[i];
4274     Iii = Ii[i];
4275     ldi = ld[i];
4276     md  = Adi[i + 1] - Adi[i];
4277     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4278     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4279     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4280     ad += md;
4281     ao += nnz - md;
4282   }
4283   nooffprocentries      = mat->nooffprocentries;
4284   mat->nooffprocentries = PETSC_TRUE;
4285   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4286   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4287   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4288   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4289   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4290   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4291   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4292   mat->nooffprocentries = nooffprocentries;
4293   PetscFunctionReturn(PETSC_SUCCESS);
4294 }
4295 
4296 /*@
4297   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4298 
4299   Collective
4300 
4301   Input Parameters:
4302 + mat - the matrix
4303 - v   - matrix values, stored by row
4304 
4305   Level: intermediate
4306 
4307   Notes:
4308   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4309 
4310   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4311 
4312 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4313           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4314 @*/
4315 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4316 {
4317   PetscInt        nnz, i, m;
4318   PetscBool       nooffprocentries;
4319   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4320   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4321   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4322   PetscScalar    *ad, *ao;
4323   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4324   PetscInt        ldi, Iii, md;
4325   PetscInt       *ld = Aij->ld;
4326 
4327   PetscFunctionBegin;
4328   m = mat->rmap->n;
4329 
4330   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4331   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4332   Iii = 0;
4333   for (i = 0; i < m; i++) {
4334     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4335     ldi = ld[i];
4336     md  = Adi[i + 1] - Adi[i];
4337     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4338     ad += md;
4339     if (ao) {
4340       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4341       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4342       ao += nnz - md;
4343     }
4344     Iii += nnz;
4345   }
4346   nooffprocentries      = mat->nooffprocentries;
4347   mat->nooffprocentries = PETSC_TRUE;
4348   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4349   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4350   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4351   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4352   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4353   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4354   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4355   mat->nooffprocentries = nooffprocentries;
4356   PetscFunctionReturn(PETSC_SUCCESS);
4357 }
4358 
4359 /*@C
4360   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4361   (the default parallel PETSc format).  For good matrix assembly performance
4362   the user should preallocate the matrix storage by setting the parameters
4363   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4364 
4365   Collective
4366 
4367   Input Parameters:
4368 + comm  - MPI communicator
4369 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4370            This value should be the same as the local size used in creating the
4371            y vector for the matrix-vector product y = Ax.
4372 . n     - This value should be the same as the local size used in creating the
4373        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4374        calculated if N is given) For square matrices n is almost always m.
4375 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4376 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4377 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4378            (same value is used for all local rows)
4379 . d_nnz - array containing the number of nonzeros in the various rows of the
4380            DIAGONAL portion of the local submatrix (possibly different for each row)
4381            or `NULL`, if `d_nz` is used to specify the nonzero structure.
4382            The size of this array is equal to the number of local rows, i.e 'm'.
4383 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4384            submatrix (same value is used for all local rows).
4385 - o_nnz - array containing the number of nonzeros in the various rows of the
4386            OFF-DIAGONAL portion of the local submatrix (possibly different for
4387            each row) or `NULL`, if `o_nz` is used to specify the nonzero
4388            structure. The size of this array is equal to the number
4389            of local rows, i.e 'm'.
4390 
4391   Output Parameter:
4392 . A - the matrix
4393 
4394   Options Database Keys:
4395 + -mat_no_inode                     - Do not use inodes
4396 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4397 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4398         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4399         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4400 
4401   Level: intermediate
4402 
4403   Notes:
4404   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4405   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4406   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4407 
4408   If the *_nnz parameter is given then the *_nz parameter is ignored
4409 
4410   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4411   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4412   storage requirements for this matrix.
4413 
4414   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4415   processor than it must be used on all processors that share the object for
4416   that argument.
4417 
4418   The user MUST specify either the local or global matrix dimensions
4419   (possibly both).
4420 
4421   The parallel matrix is partitioned across processors such that the
4422   first m0 rows belong to process 0, the next m1 rows belong to
4423   process 1, the next m2 rows belong to process 2 etc.. where
4424   m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4425   values corresponding to [m x N] submatrix.
4426 
4427   The columns are logically partitioned with the n0 columns belonging
4428   to 0th partition, the next n1 columns belonging to the next
4429   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4430 
4431   The DIAGONAL portion of the local submatrix on any given processor
4432   is the submatrix corresponding to the rows and columns m,n
4433   corresponding to the given processor. i.e diagonal matrix on
4434   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4435   etc. The remaining portion of the local submatrix [m x (N-n)]
4436   constitute the OFF-DIAGONAL portion. The example below better
4437   illustrates this concept.
4438 
4439   For a square global matrix we define each processor's diagonal portion
4440   to be its local rows and the corresponding columns (a square submatrix);
4441   each processor's off-diagonal portion encompasses the remainder of the
4442   local matrix (a rectangular submatrix).
4443 
4444   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4445 
4446   When calling this routine with a single process communicator, a matrix of
4447   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4448   type of communicator, use the construction mechanism
4449 .vb
4450   MatCreate(..., &A);
4451   MatSetType(A, MATMPIAIJ);
4452   MatSetSizes(A, m, n, M, N);
4453   MatMPIAIJSetPreallocation(A, ...);
4454 .ve
4455 
4456   By default, this format uses inodes (identical nodes) when possible.
4457   We search for consecutive rows with the same nonzero structure, thereby
4458   reusing matrix information to achieve increased efficiency.
4459 
4460   Example Usage:
4461   Consider the following 8x8 matrix with 34 non-zero values, that is
4462   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4463   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4464   as follows
4465 
4466 .vb
4467             1  2  0  |  0  3  0  |  0  4
4468     Proc0   0  5  6  |  7  0  0  |  8  0
4469             9  0 10  | 11  0  0  | 12  0
4470     -------------------------------------
4471            13  0 14  | 15 16 17  |  0  0
4472     Proc1   0 18  0  | 19 20 21  |  0  0
4473             0  0  0  | 22 23  0  | 24  0
4474     -------------------------------------
4475     Proc2  25 26 27  |  0  0 28  | 29  0
4476            30  0  0  | 31 32 33  |  0 34
4477 .ve
4478 
4479   This can be represented as a collection of submatrices as
4480 
4481 .vb
4482       A B C
4483       D E F
4484       G H I
4485 .ve
4486 
4487   Where the submatrices A,B,C are owned by proc0, D,E,F are
4488   owned by proc1, G,H,I are owned by proc2.
4489 
4490   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4491   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4492   The 'M','N' parameters are 8,8, and have the same values on all procs.
4493 
4494   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4495   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4496   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4497   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4498   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4499   matrix, ans [DF] as another SeqAIJ matrix.
4500 
4501   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4502   allocated for every row of the local diagonal submatrix, and `o_nz`
4503   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4504   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4505   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4506   In this case, the values of `d_nz`,`o_nz` are
4507 .vb
4508      proc0  dnz = 2, o_nz = 2
4509      proc1  dnz = 3, o_nz = 2
4510      proc2  dnz = 1, o_nz = 4
4511 .ve
4512   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4513   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4514   for proc3. i.e we are using 12+15+10=37 storage locations to store
4515   34 values.
4516 
4517   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4518   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4519   In the above case the values for d_nnz,o_nnz are
4520 .vb
4521      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4522      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4523      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4524 .ve
4525   Here the space allocated is sum of all the above values i.e 34, and
4526   hence pre-allocation is perfect.
4527 
4528 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4529           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4530 @*/
4531 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4532 {
4533   PetscMPIInt size;
4534 
4535   PetscFunctionBegin;
4536   PetscCall(MatCreate(comm, A));
4537   PetscCall(MatSetSizes(*A, m, n, M, N));
4538   PetscCallMPI(MPI_Comm_size(comm, &size));
4539   if (size > 1) {
4540     PetscCall(MatSetType(*A, MATMPIAIJ));
4541     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4542   } else {
4543     PetscCall(MatSetType(*A, MATSEQAIJ));
4544     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4545   }
4546   PetscFunctionReturn(PETSC_SUCCESS);
4547 }
4548 
4549 /*MC
4550     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4551 
4552     Synopsis:
4553     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4554 
4555     Not Collective
4556 
4557     Input Parameter:
4558 .   A - the `MATMPIAIJ` matrix
4559 
4560     Output Parameters:
4561 +   Ad - the diagonal portion of the matrix
4562 .   Ao - the off-diagonal portion of the matrix
4563 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4564 -   ierr - error code
4565 
4566      Level: advanced
4567 
4568     Note:
4569     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4570 
4571 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4572 M*/
4573 
4574 /*MC
4575     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4576 
4577     Synopsis:
4578     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4579 
4580     Not Collective
4581 
4582     Input Parameters:
4583 +   A - the `MATMPIAIJ` matrix
4584 .   Ad - the diagonal portion of the matrix
4585 .   Ao - the off-diagonal portion of the matrix
4586 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4587 -   ierr - error code
4588 
4589      Level: advanced
4590 
4591 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4592 M*/
4593 
4594 /*@C
4595   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4596 
4597   Not Collective
4598 
4599   Input Parameter:
4600 . A - The `MATMPIAIJ` matrix
4601 
4602   Output Parameters:
4603 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4604 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4605 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4606 
4607   Level: intermediate
4608 
4609   Note:
4610   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4611   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4612   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4613   local column numbers to global column numbers in the original matrix.
4614 
4615   Fortran Notes:
4616   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4617 
4618 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4619 @*/
4620 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4621 {
4622   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4623   PetscBool   flg;
4624 
4625   PetscFunctionBegin;
4626   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4627   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4628   if (Ad) *Ad = a->A;
4629   if (Ao) *Ao = a->B;
4630   if (colmap) *colmap = a->garray;
4631   PetscFunctionReturn(PETSC_SUCCESS);
4632 }
4633 
4634 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4635 {
4636   PetscInt     m, N, i, rstart, nnz, Ii;
4637   PetscInt    *indx;
4638   PetscScalar *values;
4639   MatType      rootType;
4640 
4641   PetscFunctionBegin;
4642   PetscCall(MatGetSize(inmat, &m, &N));
4643   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4644     PetscInt *dnz, *onz, sum, bs, cbs;
4645 
4646     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4647     /* Check sum(n) = N */
4648     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4649     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4650 
4651     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4652     rstart -= m;
4653 
4654     MatPreallocateBegin(comm, m, n, dnz, onz);
4655     for (i = 0; i < m; i++) {
4656       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4657       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4658       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4659     }
4660 
4661     PetscCall(MatCreate(comm, outmat));
4662     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4663     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4664     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4665     PetscCall(MatGetRootType_Private(inmat, &rootType));
4666     PetscCall(MatSetType(*outmat, rootType));
4667     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4668     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4669     MatPreallocateEnd(dnz, onz);
4670     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4671   }
4672 
4673   /* numeric phase */
4674   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4675   for (i = 0; i < m; i++) {
4676     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4677     Ii = i + rstart;
4678     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4679     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4680   }
4681   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4682   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4683   PetscFunctionReturn(PETSC_SUCCESS);
4684 }
4685 
4686 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4687 {
4688   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4689 
4690   PetscFunctionBegin;
4691   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4692   PetscCall(PetscFree(merge->id_r));
4693   PetscCall(PetscFree(merge->len_s));
4694   PetscCall(PetscFree(merge->len_r));
4695   PetscCall(PetscFree(merge->bi));
4696   PetscCall(PetscFree(merge->bj));
4697   PetscCall(PetscFree(merge->buf_ri[0]));
4698   PetscCall(PetscFree(merge->buf_ri));
4699   PetscCall(PetscFree(merge->buf_rj[0]));
4700   PetscCall(PetscFree(merge->buf_rj));
4701   PetscCall(PetscFree(merge->coi));
4702   PetscCall(PetscFree(merge->coj));
4703   PetscCall(PetscFree(merge->owners_co));
4704   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4705   PetscCall(PetscFree(merge));
4706   PetscFunctionReturn(PETSC_SUCCESS);
4707 }
4708 
4709 #include <../src/mat/utils/freespace.h>
4710 #include <petscbt.h>
4711 
4712 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4713 {
4714   MPI_Comm             comm;
4715   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4716   PetscMPIInt          size, rank, taga, *len_s;
4717   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4718   PetscInt             proc, m;
4719   PetscInt           **buf_ri, **buf_rj;
4720   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4721   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4722   MPI_Request         *s_waits, *r_waits;
4723   MPI_Status          *status;
4724   const MatScalar     *aa, *a_a;
4725   MatScalar          **abuf_r, *ba_i;
4726   Mat_Merge_SeqsToMPI *merge;
4727   PetscContainer       container;
4728 
4729   PetscFunctionBegin;
4730   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4731   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4732 
4733   PetscCallMPI(MPI_Comm_size(comm, &size));
4734   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4735 
4736   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4737   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4738   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4739   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4740   aa = a_a;
4741 
4742   bi     = merge->bi;
4743   bj     = merge->bj;
4744   buf_ri = merge->buf_ri;
4745   buf_rj = merge->buf_rj;
4746 
4747   PetscCall(PetscMalloc1(size, &status));
4748   owners = merge->rowmap->range;
4749   len_s  = merge->len_s;
4750 
4751   /* send and recv matrix values */
4752   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4753   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4754 
4755   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4756   for (proc = 0, k = 0; proc < size; proc++) {
4757     if (!len_s[proc]) continue;
4758     i = owners[proc];
4759     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4760     k++;
4761   }
4762 
4763   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4764   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4765   PetscCall(PetscFree(status));
4766 
4767   PetscCall(PetscFree(s_waits));
4768   PetscCall(PetscFree(r_waits));
4769 
4770   /* insert mat values of mpimat */
4771   PetscCall(PetscMalloc1(N, &ba_i));
4772   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4773 
4774   for (k = 0; k < merge->nrecv; k++) {
4775     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4776     nrows       = *buf_ri_k[k];
4777     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4778     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4779   }
4780 
4781   /* set values of ba */
4782   m = merge->rowmap->n;
4783   for (i = 0; i < m; i++) {
4784     arow = owners[rank] + i;
4785     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4786     bnzi = bi[i + 1] - bi[i];
4787     PetscCall(PetscArrayzero(ba_i, bnzi));
4788 
4789     /* add local non-zero vals of this proc's seqmat into ba */
4790     anzi   = ai[arow + 1] - ai[arow];
4791     aj     = a->j + ai[arow];
4792     aa     = a_a + ai[arow];
4793     nextaj = 0;
4794     for (j = 0; nextaj < anzi; j++) {
4795       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4796         ba_i[j] += aa[nextaj++];
4797       }
4798     }
4799 
4800     /* add received vals into ba */
4801     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4802       /* i-th row */
4803       if (i == *nextrow[k]) {
4804         anzi   = *(nextai[k] + 1) - *nextai[k];
4805         aj     = buf_rj[k] + *nextai[k];
4806         aa     = abuf_r[k] + *nextai[k];
4807         nextaj = 0;
4808         for (j = 0; nextaj < anzi; j++) {
4809           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4810             ba_i[j] += aa[nextaj++];
4811           }
4812         }
4813         nextrow[k]++;
4814         nextai[k]++;
4815       }
4816     }
4817     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4818   }
4819   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4820   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4821   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4822 
4823   PetscCall(PetscFree(abuf_r[0]));
4824   PetscCall(PetscFree(abuf_r));
4825   PetscCall(PetscFree(ba_i));
4826   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4827   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4828   PetscFunctionReturn(PETSC_SUCCESS);
4829 }
4830 
4831 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4832 {
4833   Mat                  B_mpi;
4834   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4835   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4836   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4837   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4838   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4839   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4840   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4841   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4842   MPI_Status          *status;
4843   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4844   PetscBT              lnkbt;
4845   Mat_Merge_SeqsToMPI *merge;
4846   PetscContainer       container;
4847 
4848   PetscFunctionBegin;
4849   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4850 
4851   /* make sure it is a PETSc comm */
4852   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4853   PetscCallMPI(MPI_Comm_size(comm, &size));
4854   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4855 
4856   PetscCall(PetscNew(&merge));
4857   PetscCall(PetscMalloc1(size, &status));
4858 
4859   /* determine row ownership */
4860   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4861   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4862   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4863   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4864   PetscCall(PetscLayoutSetUp(merge->rowmap));
4865   PetscCall(PetscMalloc1(size, &len_si));
4866   PetscCall(PetscMalloc1(size, &merge->len_s));
4867 
4868   m      = merge->rowmap->n;
4869   owners = merge->rowmap->range;
4870 
4871   /* determine the number of messages to send, their lengths */
4872   len_s = merge->len_s;
4873 
4874   len          = 0; /* length of buf_si[] */
4875   merge->nsend = 0;
4876   for (proc = 0; proc < size; proc++) {
4877     len_si[proc] = 0;
4878     if (proc == rank) {
4879       len_s[proc] = 0;
4880     } else {
4881       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4882       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4883     }
4884     if (len_s[proc]) {
4885       merge->nsend++;
4886       nrows = 0;
4887       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4888         if (ai[i + 1] > ai[i]) nrows++;
4889       }
4890       len_si[proc] = 2 * (nrows + 1);
4891       len += len_si[proc];
4892     }
4893   }
4894 
4895   /* determine the number and length of messages to receive for ij-structure */
4896   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4897   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4898 
4899   /* post the Irecv of j-structure */
4900   PetscCall(PetscCommGetNewTag(comm, &tagj));
4901   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4902 
4903   /* post the Isend of j-structure */
4904   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4905 
4906   for (proc = 0, k = 0; proc < size; proc++) {
4907     if (!len_s[proc]) continue;
4908     i = owners[proc];
4909     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4910     k++;
4911   }
4912 
4913   /* receives and sends of j-structure are complete */
4914   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4915   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4916 
4917   /* send and recv i-structure */
4918   PetscCall(PetscCommGetNewTag(comm, &tagi));
4919   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4920 
4921   PetscCall(PetscMalloc1(len + 1, &buf_s));
4922   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4923   for (proc = 0, k = 0; proc < size; proc++) {
4924     if (!len_s[proc]) continue;
4925     /* form outgoing message for i-structure:
4926          buf_si[0]:                 nrows to be sent
4927                [1:nrows]:           row index (global)
4928                [nrows+1:2*nrows+1]: i-structure index
4929     */
4930     nrows       = len_si[proc] / 2 - 1;
4931     buf_si_i    = buf_si + nrows + 1;
4932     buf_si[0]   = nrows;
4933     buf_si_i[0] = 0;
4934     nrows       = 0;
4935     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4936       anzi = ai[i + 1] - ai[i];
4937       if (anzi) {
4938         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4939         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4940         nrows++;
4941       }
4942     }
4943     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4944     k++;
4945     buf_si += len_si[proc];
4946   }
4947 
4948   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4949   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4950 
4951   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4952   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4953 
4954   PetscCall(PetscFree(len_si));
4955   PetscCall(PetscFree(len_ri));
4956   PetscCall(PetscFree(rj_waits));
4957   PetscCall(PetscFree2(si_waits, sj_waits));
4958   PetscCall(PetscFree(ri_waits));
4959   PetscCall(PetscFree(buf_s));
4960   PetscCall(PetscFree(status));
4961 
4962   /* compute a local seq matrix in each processor */
4963   /* allocate bi array and free space for accumulating nonzero column info */
4964   PetscCall(PetscMalloc1(m + 1, &bi));
4965   bi[0] = 0;
4966 
4967   /* create and initialize a linked list */
4968   nlnk = N + 1;
4969   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4970 
4971   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4972   len = ai[owners[rank + 1]] - ai[owners[rank]];
4973   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4974 
4975   current_space = free_space;
4976 
4977   /* determine symbolic info for each local row */
4978   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4979 
4980   for (k = 0; k < merge->nrecv; k++) {
4981     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4982     nrows       = *buf_ri_k[k];
4983     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4984     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4985   }
4986 
4987   MatPreallocateBegin(comm, m, n, dnz, onz);
4988   len = 0;
4989   for (i = 0; i < m; i++) {
4990     bnzi = 0;
4991     /* add local non-zero cols of this proc's seqmat into lnk */
4992     arow = owners[rank] + i;
4993     anzi = ai[arow + 1] - ai[arow];
4994     aj   = a->j + ai[arow];
4995     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4996     bnzi += nlnk;
4997     /* add received col data into lnk */
4998     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4999       if (i == *nextrow[k]) {            /* i-th row */
5000         anzi = *(nextai[k] + 1) - *nextai[k];
5001         aj   = buf_rj[k] + *nextai[k];
5002         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5003         bnzi += nlnk;
5004         nextrow[k]++;
5005         nextai[k]++;
5006       }
5007     }
5008     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5009 
5010     /* if free space is not available, make more free space */
5011     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5012     /* copy data into free space, then initialize lnk */
5013     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5014     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5015 
5016     current_space->array += bnzi;
5017     current_space->local_used += bnzi;
5018     current_space->local_remaining -= bnzi;
5019 
5020     bi[i + 1] = bi[i] + bnzi;
5021   }
5022 
5023   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5024 
5025   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5026   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5027   PetscCall(PetscLLDestroy(lnk, lnkbt));
5028 
5029   /* create symbolic parallel matrix B_mpi */
5030   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5031   PetscCall(MatCreate(comm, &B_mpi));
5032   if (n == PETSC_DECIDE) {
5033     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5034   } else {
5035     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5036   }
5037   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5038   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5039   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5040   MatPreallocateEnd(dnz, onz);
5041   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5042 
5043   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5044   B_mpi->assembled = PETSC_FALSE;
5045   merge->bi        = bi;
5046   merge->bj        = bj;
5047   merge->buf_ri    = buf_ri;
5048   merge->buf_rj    = buf_rj;
5049   merge->coi       = NULL;
5050   merge->coj       = NULL;
5051   merge->owners_co = NULL;
5052 
5053   PetscCall(PetscCommDestroy(&comm));
5054 
5055   /* attach the supporting struct to B_mpi for reuse */
5056   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5057   PetscCall(PetscContainerSetPointer(container, merge));
5058   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5059   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5060   PetscCall(PetscContainerDestroy(&container));
5061   *mpimat = B_mpi;
5062 
5063   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5064   PetscFunctionReturn(PETSC_SUCCESS);
5065 }
5066 
5067 /*@C
5068   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5069   matrices from each processor
5070 
5071   Collective
5072 
5073   Input Parameters:
5074 + comm   - the communicators the parallel matrix will live on
5075 . seqmat - the input sequential matrices
5076 . m      - number of local rows (or `PETSC_DECIDE`)
5077 . n      - number of local columns (or `PETSC_DECIDE`)
5078 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5079 
5080   Output Parameter:
5081 . mpimat - the parallel matrix generated
5082 
5083   Level: advanced
5084 
5085   Note:
5086   The dimensions of the sequential matrix in each processor MUST be the same.
5087   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5088   destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5089 
5090 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5091 @*/
5092 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5093 {
5094   PetscMPIInt size;
5095 
5096   PetscFunctionBegin;
5097   PetscCallMPI(MPI_Comm_size(comm, &size));
5098   if (size == 1) {
5099     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5100     if (scall == MAT_INITIAL_MATRIX) {
5101       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5102     } else {
5103       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5104     }
5105     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5106     PetscFunctionReturn(PETSC_SUCCESS);
5107   }
5108   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5109   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5110   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5111   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5112   PetscFunctionReturn(PETSC_SUCCESS);
5113 }
5114 
5115 /*@
5116   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5117 
5118   Not Collective
5119 
5120   Input Parameter:
5121 . A - the matrix
5122 
5123   Output Parameter:
5124 . A_loc - the local sequential matrix generated
5125 
5126   Level: developer
5127 
5128   Notes:
5129   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5130   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5131   `n` is the global column count obtained with `MatGetSize()`
5132 
5133   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5134 
5135   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5136 
5137   Destroy the matrix with `MatDestroy()`
5138 
5139 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5140 @*/
5141 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5142 {
5143   PetscBool mpi;
5144 
5145   PetscFunctionBegin;
5146   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5147   if (mpi) {
5148     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5149   } else {
5150     *A_loc = A;
5151     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5152   }
5153   PetscFunctionReturn(PETSC_SUCCESS);
5154 }
5155 
5156 /*@
5157   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5158 
5159   Not Collective
5160 
5161   Input Parameters:
5162 + A     - the matrix
5163 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5164 
5165   Output Parameter:
5166 . A_loc - the local sequential matrix generated
5167 
5168   Level: developer
5169 
5170   Notes:
5171   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5172   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5173   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5174 
5175   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5176 
5177   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5178   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5179   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5180   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5181 
5182 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5183 @*/
5184 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5185 {
5186   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5187   Mat_SeqAIJ        *mat, *a, *b;
5188   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5189   const PetscScalar *aa, *ba, *aav, *bav;
5190   PetscScalar       *ca, *cam;
5191   PetscMPIInt        size;
5192   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5193   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5194   PetscBool          match;
5195 
5196   PetscFunctionBegin;
5197   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5198   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5199   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5200   if (size == 1) {
5201     if (scall == MAT_INITIAL_MATRIX) {
5202       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5203       *A_loc = mpimat->A;
5204     } else if (scall == MAT_REUSE_MATRIX) {
5205       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5206     }
5207     PetscFunctionReturn(PETSC_SUCCESS);
5208   }
5209 
5210   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5211   a  = (Mat_SeqAIJ *)mpimat->A->data;
5212   b  = (Mat_SeqAIJ *)mpimat->B->data;
5213   ai = a->i;
5214   aj = a->j;
5215   bi = b->i;
5216   bj = b->j;
5217   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5218   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5219   aa = aav;
5220   ba = bav;
5221   if (scall == MAT_INITIAL_MATRIX) {
5222     PetscCall(PetscMalloc1(1 + am, &ci));
5223     ci[0] = 0;
5224     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5225     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5226     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5227     k = 0;
5228     for (i = 0; i < am; i++) {
5229       ncols_o = bi[i + 1] - bi[i];
5230       ncols_d = ai[i + 1] - ai[i];
5231       /* off-diagonal portion of A */
5232       for (jo = 0; jo < ncols_o; jo++) {
5233         col = cmap[*bj];
5234         if (col >= cstart) break;
5235         cj[k] = col;
5236         bj++;
5237         ca[k++] = *ba++;
5238       }
5239       /* diagonal portion of A */
5240       for (j = 0; j < ncols_d; j++) {
5241         cj[k]   = cstart + *aj++;
5242         ca[k++] = *aa++;
5243       }
5244       /* off-diagonal portion of A */
5245       for (j = jo; j < ncols_o; j++) {
5246         cj[k]   = cmap[*bj++];
5247         ca[k++] = *ba++;
5248       }
5249     }
5250     /* put together the new matrix */
5251     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5252     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5253     /* Since these are PETSc arrays, change flags to free them as necessary. */
5254     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5255     mat->free_a  = PETSC_TRUE;
5256     mat->free_ij = PETSC_TRUE;
5257     mat->nonew   = 0;
5258   } else if (scall == MAT_REUSE_MATRIX) {
5259     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5260     ci  = mat->i;
5261     cj  = mat->j;
5262     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5263     for (i = 0; i < am; i++) {
5264       /* off-diagonal portion of A */
5265       ncols_o = bi[i + 1] - bi[i];
5266       for (jo = 0; jo < ncols_o; jo++) {
5267         col = cmap[*bj];
5268         if (col >= cstart) break;
5269         *cam++ = *ba++;
5270         bj++;
5271       }
5272       /* diagonal portion of A */
5273       ncols_d = ai[i + 1] - ai[i];
5274       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5275       /* off-diagonal portion of A */
5276       for (j = jo; j < ncols_o; j++) {
5277         *cam++ = *ba++;
5278         bj++;
5279       }
5280     }
5281     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5282   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5283   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5284   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5285   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5286   PetscFunctionReturn(PETSC_SUCCESS);
5287 }
5288 
5289 /*@
5290   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5291   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5292 
5293   Not Collective
5294 
5295   Input Parameters:
5296 + A     - the matrix
5297 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5298 
5299   Output Parameters:
5300 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5301 - A_loc - the local sequential matrix generated
5302 
5303   Level: developer
5304 
5305   Note:
5306   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5307   part, then those associated with the off-diagonal part (in its local ordering)
5308 
5309 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5310 @*/
5311 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5312 {
5313   Mat             Ao, Ad;
5314   const PetscInt *cmap;
5315   PetscMPIInt     size;
5316   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5317 
5318   PetscFunctionBegin;
5319   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5320   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5321   if (size == 1) {
5322     if (scall == MAT_INITIAL_MATRIX) {
5323       PetscCall(PetscObjectReference((PetscObject)Ad));
5324       *A_loc = Ad;
5325     } else if (scall == MAT_REUSE_MATRIX) {
5326       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5327     }
5328     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5329     PetscFunctionReturn(PETSC_SUCCESS);
5330   }
5331   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5332   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5333   if (f) {
5334     PetscCall((*f)(A, scall, glob, A_loc));
5335   } else {
5336     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5337     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5338     Mat_SeqAIJ        *c;
5339     PetscInt          *ai = a->i, *aj = a->j;
5340     PetscInt          *bi = b->i, *bj = b->j;
5341     PetscInt          *ci, *cj;
5342     const PetscScalar *aa, *ba;
5343     PetscScalar       *ca;
5344     PetscInt           i, j, am, dn, on;
5345 
5346     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5347     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5348     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5349     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5350     if (scall == MAT_INITIAL_MATRIX) {
5351       PetscInt k;
5352       PetscCall(PetscMalloc1(1 + am, &ci));
5353       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5354       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5355       ci[0] = 0;
5356       for (i = 0, k = 0; i < am; i++) {
5357         const PetscInt ncols_o = bi[i + 1] - bi[i];
5358         const PetscInt ncols_d = ai[i + 1] - ai[i];
5359         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5360         /* diagonal portion of A */
5361         for (j = 0; j < ncols_d; j++, k++) {
5362           cj[k] = *aj++;
5363           ca[k] = *aa++;
5364         }
5365         /* off-diagonal portion of A */
5366         for (j = 0; j < ncols_o; j++, k++) {
5367           cj[k] = dn + *bj++;
5368           ca[k] = *ba++;
5369         }
5370       }
5371       /* put together the new matrix */
5372       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5373       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5374       /* Since these are PETSc arrays, change flags to free them as necessary. */
5375       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5376       c->free_a  = PETSC_TRUE;
5377       c->free_ij = PETSC_TRUE;
5378       c->nonew   = 0;
5379       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5380     } else if (scall == MAT_REUSE_MATRIX) {
5381       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5382       for (i = 0; i < am; i++) {
5383         const PetscInt ncols_d = ai[i + 1] - ai[i];
5384         const PetscInt ncols_o = bi[i + 1] - bi[i];
5385         /* diagonal portion of A */
5386         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5387         /* off-diagonal portion of A */
5388         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5389       }
5390       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5391     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5392     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5393     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5394     if (glob) {
5395       PetscInt cst, *gidx;
5396 
5397       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5398       PetscCall(PetscMalloc1(dn + on, &gidx));
5399       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5400       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5401       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5402     }
5403   }
5404   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5405   PetscFunctionReturn(PETSC_SUCCESS);
5406 }
5407 
5408 /*@C
5409   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5410 
5411   Not Collective
5412 
5413   Input Parameters:
5414 + A     - the matrix
5415 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5416 . row   - index set of rows to extract (or `NULL`)
5417 - col   - index set of columns to extract (or `NULL`)
5418 
5419   Output Parameter:
5420 . A_loc - the local sequential matrix generated
5421 
5422   Level: developer
5423 
5424 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5425 @*/
5426 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5427 {
5428   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5429   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5430   IS          isrowa, iscola;
5431   Mat        *aloc;
5432   PetscBool   match;
5433 
5434   PetscFunctionBegin;
5435   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5436   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5437   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5438   if (!row) {
5439     start = A->rmap->rstart;
5440     end   = A->rmap->rend;
5441     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5442   } else {
5443     isrowa = *row;
5444   }
5445   if (!col) {
5446     start = A->cmap->rstart;
5447     cmap  = a->garray;
5448     nzA   = a->A->cmap->n;
5449     nzB   = a->B->cmap->n;
5450     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5451     ncols = 0;
5452     for (i = 0; i < nzB; i++) {
5453       if (cmap[i] < start) idx[ncols++] = cmap[i];
5454       else break;
5455     }
5456     imark = i;
5457     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5458     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5459     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5460   } else {
5461     iscola = *col;
5462   }
5463   if (scall != MAT_INITIAL_MATRIX) {
5464     PetscCall(PetscMalloc1(1, &aloc));
5465     aloc[0] = *A_loc;
5466   }
5467   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5468   if (!col) { /* attach global id of condensed columns */
5469     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5470   }
5471   *A_loc = aloc[0];
5472   PetscCall(PetscFree(aloc));
5473   if (!row) PetscCall(ISDestroy(&isrowa));
5474   if (!col) PetscCall(ISDestroy(&iscola));
5475   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5476   PetscFunctionReturn(PETSC_SUCCESS);
5477 }
5478 
5479 /*
5480  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5481  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5482  * on a global size.
5483  * */
5484 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5485 {
5486   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5487   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5488   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5489   PetscMPIInt            owner;
5490   PetscSFNode           *iremote, *oiremote;
5491   const PetscInt        *lrowindices;
5492   PetscSF                sf, osf;
5493   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5494   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5495   MPI_Comm               comm;
5496   ISLocalToGlobalMapping mapping;
5497   const PetscScalar     *pd_a, *po_a;
5498 
5499   PetscFunctionBegin;
5500   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5501   /* plocalsize is the number of roots
5502    * nrows is the number of leaves
5503    * */
5504   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5505   PetscCall(ISGetLocalSize(rows, &nrows));
5506   PetscCall(PetscCalloc1(nrows, &iremote));
5507   PetscCall(ISGetIndices(rows, &lrowindices));
5508   for (i = 0; i < nrows; i++) {
5509     /* Find a remote index and an owner for a row
5510      * The row could be local or remote
5511      * */
5512     owner = 0;
5513     lidx  = 0;
5514     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5515     iremote[i].index = lidx;
5516     iremote[i].rank  = owner;
5517   }
5518   /* Create SF to communicate how many nonzero columns for each row */
5519   PetscCall(PetscSFCreate(comm, &sf));
5520   /* SF will figure out the number of nonzero columns for each row, and their
5521    * offsets
5522    * */
5523   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5524   PetscCall(PetscSFSetFromOptions(sf));
5525   PetscCall(PetscSFSetUp(sf));
5526 
5527   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5528   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5529   PetscCall(PetscCalloc1(nrows, &pnnz));
5530   roffsets[0] = 0;
5531   roffsets[1] = 0;
5532   for (i = 0; i < plocalsize; i++) {
5533     /* diagonal */
5534     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5535     /* off-diagonal */
5536     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5537     /* compute offsets so that we relative location for each row */
5538     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5539     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5540   }
5541   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5542   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5543   /* 'r' means root, and 'l' means leaf */
5544   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5545   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5546   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5547   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5548   PetscCall(PetscSFDestroy(&sf));
5549   PetscCall(PetscFree(roffsets));
5550   PetscCall(PetscFree(nrcols));
5551   dntotalcols = 0;
5552   ontotalcols = 0;
5553   ncol        = 0;
5554   for (i = 0; i < nrows; i++) {
5555     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5556     ncol    = PetscMax(pnnz[i], ncol);
5557     /* diagonal */
5558     dntotalcols += nlcols[i * 2 + 0];
5559     /* off-diagonal */
5560     ontotalcols += nlcols[i * 2 + 1];
5561   }
5562   /* We do not need to figure the right number of columns
5563    * since all the calculations will be done by going through the raw data
5564    * */
5565   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5566   PetscCall(MatSetUp(*P_oth));
5567   PetscCall(PetscFree(pnnz));
5568   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5569   /* diagonal */
5570   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5571   /* off-diagonal */
5572   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5573   /* diagonal */
5574   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5575   /* off-diagonal */
5576   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5577   dntotalcols = 0;
5578   ontotalcols = 0;
5579   ntotalcols  = 0;
5580   for (i = 0; i < nrows; i++) {
5581     owner = 0;
5582     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5583     /* Set iremote for diag matrix */
5584     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5585       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5586       iremote[dntotalcols].rank  = owner;
5587       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5588       ilocal[dntotalcols++] = ntotalcols++;
5589     }
5590     /* off-diagonal */
5591     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5592       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5593       oiremote[ontotalcols].rank  = owner;
5594       oilocal[ontotalcols++]      = ntotalcols++;
5595     }
5596   }
5597   PetscCall(ISRestoreIndices(rows, &lrowindices));
5598   PetscCall(PetscFree(loffsets));
5599   PetscCall(PetscFree(nlcols));
5600   PetscCall(PetscSFCreate(comm, &sf));
5601   /* P serves as roots and P_oth is leaves
5602    * Diag matrix
5603    * */
5604   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5605   PetscCall(PetscSFSetFromOptions(sf));
5606   PetscCall(PetscSFSetUp(sf));
5607 
5608   PetscCall(PetscSFCreate(comm, &osf));
5609   /* off-diagonal */
5610   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5611   PetscCall(PetscSFSetFromOptions(osf));
5612   PetscCall(PetscSFSetUp(osf));
5613   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5614   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5615   /* operate on the matrix internal data to save memory */
5616   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5617   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5618   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5619   /* Convert to global indices for diag matrix */
5620   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5621   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5622   /* We want P_oth store global indices */
5623   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5624   /* Use memory scalable approach */
5625   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5626   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5627   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5628   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5629   /* Convert back to local indices */
5630   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5631   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5632   nout = 0;
5633   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5634   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5635   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5636   /* Exchange values */
5637   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5638   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5639   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5640   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5641   /* Stop PETSc from shrinking memory */
5642   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5643   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5644   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5645   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5646   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5647   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5648   PetscCall(PetscSFDestroy(&sf));
5649   PetscCall(PetscSFDestroy(&osf));
5650   PetscFunctionReturn(PETSC_SUCCESS);
5651 }
5652 
5653 /*
5654  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5655  * This supports MPIAIJ and MAIJ
5656  * */
5657 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5658 {
5659   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5660   Mat_SeqAIJ *p_oth;
5661   IS          rows, map;
5662   PetscHMapI  hamp;
5663   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5664   MPI_Comm    comm;
5665   PetscSF     sf, osf;
5666   PetscBool   has;
5667 
5668   PetscFunctionBegin;
5669   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5670   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5671   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5672    *  and then create a submatrix (that often is an overlapping matrix)
5673    * */
5674   if (reuse == MAT_INITIAL_MATRIX) {
5675     /* Use a hash table to figure out unique keys */
5676     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5677     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5678     count = 0;
5679     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5680     for (i = 0; i < a->B->cmap->n; i++) {
5681       key = a->garray[i] / dof;
5682       PetscCall(PetscHMapIHas(hamp, key, &has));
5683       if (!has) {
5684         mapping[i] = count;
5685         PetscCall(PetscHMapISet(hamp, key, count++));
5686       } else {
5687         /* Current 'i' has the same value the previous step */
5688         mapping[i] = count - 1;
5689       }
5690     }
5691     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5692     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5693     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5694     PetscCall(PetscCalloc1(htsize, &rowindices));
5695     off = 0;
5696     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5697     PetscCall(PetscHMapIDestroy(&hamp));
5698     PetscCall(PetscSortInt(htsize, rowindices));
5699     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5700     /* In case, the matrix was already created but users want to recreate the matrix */
5701     PetscCall(MatDestroy(P_oth));
5702     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5703     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5704     PetscCall(ISDestroy(&map));
5705     PetscCall(ISDestroy(&rows));
5706   } else if (reuse == MAT_REUSE_MATRIX) {
5707     /* If matrix was already created, we simply update values using SF objects
5708      * that as attached to the matrix earlier.
5709      */
5710     const PetscScalar *pd_a, *po_a;
5711 
5712     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5713     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5714     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5715     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5716     /* Update values in place */
5717     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5718     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5719     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5720     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5721     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5722     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5723     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5724     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5725   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5726   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5727   PetscFunctionReturn(PETSC_SUCCESS);
5728 }
5729 
5730 /*@C
5731   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5732 
5733   Collective
5734 
5735   Input Parameters:
5736 + A     - the first matrix in `MATMPIAIJ` format
5737 . B     - the second matrix in `MATMPIAIJ` format
5738 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5739 
5740   Output Parameters:
5741 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5742 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5743 - B_seq - the sequential matrix generated
5744 
5745   Level: developer
5746 
5747 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5748 @*/
5749 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5750 {
5751   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5752   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5753   IS          isrowb, iscolb;
5754   Mat        *bseq = NULL;
5755 
5756   PetscFunctionBegin;
5757   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5758              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5759   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5760 
5761   if (scall == MAT_INITIAL_MATRIX) {
5762     start = A->cmap->rstart;
5763     cmap  = a->garray;
5764     nzA   = a->A->cmap->n;
5765     nzB   = a->B->cmap->n;
5766     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5767     ncols = 0;
5768     for (i = 0; i < nzB; i++) { /* row < local row index */
5769       if (cmap[i] < start) idx[ncols++] = cmap[i];
5770       else break;
5771     }
5772     imark = i;
5773     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5774     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5775     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5776     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5777   } else {
5778     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5779     isrowb = *rowb;
5780     iscolb = *colb;
5781     PetscCall(PetscMalloc1(1, &bseq));
5782     bseq[0] = *B_seq;
5783   }
5784   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5785   *B_seq = bseq[0];
5786   PetscCall(PetscFree(bseq));
5787   if (!rowb) {
5788     PetscCall(ISDestroy(&isrowb));
5789   } else {
5790     *rowb = isrowb;
5791   }
5792   if (!colb) {
5793     PetscCall(ISDestroy(&iscolb));
5794   } else {
5795     *colb = iscolb;
5796   }
5797   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5798   PetscFunctionReturn(PETSC_SUCCESS);
5799 }
5800 
5801 /*
5802     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5803     of the OFF-DIAGONAL portion of local A
5804 
5805     Collective
5806 
5807    Input Parameters:
5808 +    A,B - the matrices in `MATMPIAIJ` format
5809 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5810 
5811    Output Parameter:
5812 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5813 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5814 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5815 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5816 
5817     Developer Note:
5818     This directly accesses information inside the VecScatter associated with the matrix-vector product
5819      for this matrix. This is not desirable..
5820 
5821     Level: developer
5822 
5823 */
5824 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5825 {
5826   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5827   Mat_SeqAIJ        *b_oth;
5828   VecScatter         ctx;
5829   MPI_Comm           comm;
5830   const PetscMPIInt *rprocs, *sprocs;
5831   const PetscInt    *srow, *rstarts, *sstarts;
5832   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5833   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5834   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5835   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5836   PetscMPIInt        size, tag, rank, nreqs;
5837 
5838   PetscFunctionBegin;
5839   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5840   PetscCallMPI(MPI_Comm_size(comm, &size));
5841 
5842   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5843              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5844   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5845   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5846 
5847   if (size == 1) {
5848     startsj_s = NULL;
5849     bufa_ptr  = NULL;
5850     *B_oth    = NULL;
5851     PetscFunctionReturn(PETSC_SUCCESS);
5852   }
5853 
5854   ctx = a->Mvctx;
5855   tag = ((PetscObject)ctx)->tag;
5856 
5857   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5858   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5859   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5860   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5861   PetscCall(PetscMalloc1(nreqs, &reqs));
5862   rwaits = reqs;
5863   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5864 
5865   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5866   if (scall == MAT_INITIAL_MATRIX) {
5867     /* i-array */
5868     /*  post receives */
5869     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5870     for (i = 0; i < nrecvs; i++) {
5871       rowlen = rvalues + rstarts[i] * rbs;
5872       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5873       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5874     }
5875 
5876     /* pack the outgoing message */
5877     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5878 
5879     sstartsj[0] = 0;
5880     rstartsj[0] = 0;
5881     len         = 0; /* total length of j or a array to be sent */
5882     if (nsends) {
5883       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5884       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5885     }
5886     for (i = 0; i < nsends; i++) {
5887       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5888       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5889       for (j = 0; j < nrows; j++) {
5890         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5891         for (l = 0; l < sbs; l++) {
5892           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5893 
5894           rowlen[j * sbs + l] = ncols;
5895 
5896           len += ncols;
5897           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5898         }
5899         k++;
5900       }
5901       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5902 
5903       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5904     }
5905     /* recvs and sends of i-array are completed */
5906     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5907     PetscCall(PetscFree(svalues));
5908 
5909     /* allocate buffers for sending j and a arrays */
5910     PetscCall(PetscMalloc1(len + 1, &bufj));
5911     PetscCall(PetscMalloc1(len + 1, &bufa));
5912 
5913     /* create i-array of B_oth */
5914     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5915 
5916     b_othi[0] = 0;
5917     len       = 0; /* total length of j or a array to be received */
5918     k         = 0;
5919     for (i = 0; i < nrecvs; i++) {
5920       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5921       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5922       for (j = 0; j < nrows; j++) {
5923         b_othi[k + 1] = b_othi[k] + rowlen[j];
5924         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5925         k++;
5926       }
5927       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5928     }
5929     PetscCall(PetscFree(rvalues));
5930 
5931     /* allocate space for j and a arrays of B_oth */
5932     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5933     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5934 
5935     /* j-array */
5936     /*  post receives of j-array */
5937     for (i = 0; i < nrecvs; i++) {
5938       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5939       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5940     }
5941 
5942     /* pack the outgoing message j-array */
5943     if (nsends) k = sstarts[0];
5944     for (i = 0; i < nsends; i++) {
5945       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5946       bufJ  = bufj + sstartsj[i];
5947       for (j = 0; j < nrows; j++) {
5948         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5949         for (ll = 0; ll < sbs; ll++) {
5950           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5951           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5952           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5953         }
5954       }
5955       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5956     }
5957 
5958     /* recvs and sends of j-array are completed */
5959     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5960   } else if (scall == MAT_REUSE_MATRIX) {
5961     sstartsj = *startsj_s;
5962     rstartsj = *startsj_r;
5963     bufa     = *bufa_ptr;
5964     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5965     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5966   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5967 
5968   /* a-array */
5969   /*  post receives of a-array */
5970   for (i = 0; i < nrecvs; i++) {
5971     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5972     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5973   }
5974 
5975   /* pack the outgoing message a-array */
5976   if (nsends) k = sstarts[0];
5977   for (i = 0; i < nsends; i++) {
5978     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5979     bufA  = bufa + sstartsj[i];
5980     for (j = 0; j < nrows; j++) {
5981       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5982       for (ll = 0; ll < sbs; ll++) {
5983         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5984         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5985         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5986       }
5987     }
5988     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5989   }
5990   /* recvs and sends of a-array are completed */
5991   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5992   PetscCall(PetscFree(reqs));
5993 
5994   if (scall == MAT_INITIAL_MATRIX) {
5995     /* put together the new matrix */
5996     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5997 
5998     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5999     /* Since these are PETSc arrays, change flags to free them as necessary. */
6000     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6001     b_oth->free_a  = PETSC_TRUE;
6002     b_oth->free_ij = PETSC_TRUE;
6003     b_oth->nonew   = 0;
6004 
6005     PetscCall(PetscFree(bufj));
6006     if (!startsj_s || !bufa_ptr) {
6007       PetscCall(PetscFree2(sstartsj, rstartsj));
6008       PetscCall(PetscFree(bufa_ptr));
6009     } else {
6010       *startsj_s = sstartsj;
6011       *startsj_r = rstartsj;
6012       *bufa_ptr  = bufa;
6013     }
6014   } else if (scall == MAT_REUSE_MATRIX) {
6015     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6016   }
6017 
6018   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6019   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6020   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6021   PetscFunctionReturn(PETSC_SUCCESS);
6022 }
6023 
6024 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6025 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6026 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6027 #if defined(PETSC_HAVE_MKL_SPARSE)
6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6029 #endif
6030 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6032 #if defined(PETSC_HAVE_ELEMENTAL)
6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6034 #endif
6035 #if defined(PETSC_HAVE_SCALAPACK)
6036 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6037 #endif
6038 #if defined(PETSC_HAVE_HYPRE)
6039 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6040 #endif
6041 #if defined(PETSC_HAVE_CUDA)
6042 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6043 #endif
6044 #if defined(PETSC_HAVE_HIP)
6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6046 #endif
6047 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6048 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6049 #endif
6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6051 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6052 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6053 
6054 /*
6055     Computes (B'*A')' since computing B*A directly is untenable
6056 
6057                n                       p                          p
6058         [             ]       [             ]         [                 ]
6059       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6060         [             ]       [             ]         [                 ]
6061 
6062 */
6063 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6064 {
6065   Mat At, Bt, Ct;
6066 
6067   PetscFunctionBegin;
6068   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6069   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6070   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6071   PetscCall(MatDestroy(&At));
6072   PetscCall(MatDestroy(&Bt));
6073   PetscCall(MatTransposeSetPrecursor(Ct, C));
6074   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6075   PetscCall(MatDestroy(&Ct));
6076   PetscFunctionReturn(PETSC_SUCCESS);
6077 }
6078 
6079 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6080 {
6081   PetscBool cisdense;
6082 
6083   PetscFunctionBegin;
6084   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6085   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6086   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6087   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6088   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6089   PetscCall(MatSetUp(C));
6090 
6091   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6092   PetscFunctionReturn(PETSC_SUCCESS);
6093 }
6094 
6095 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6096 {
6097   Mat_Product *product = C->product;
6098   Mat          A = product->A, B = product->B;
6099 
6100   PetscFunctionBegin;
6101   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6102              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6103   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6104   C->ops->productsymbolic = MatProductSymbolic_AB;
6105   PetscFunctionReturn(PETSC_SUCCESS);
6106 }
6107 
6108 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6109 {
6110   Mat_Product *product = C->product;
6111 
6112   PetscFunctionBegin;
6113   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6114   PetscFunctionReturn(PETSC_SUCCESS);
6115 }
6116 
6117 /*
6118    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6119 
6120   Input Parameters:
6121 
6122     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6123     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6124 
6125     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6126 
6127     For Set1, j1[] contains column indices of the nonzeros.
6128     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6129     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6130     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6131 
6132     Similar for Set2.
6133 
6134     This routine merges the two sets of nonzeros row by row and removes repeats.
6135 
6136   Output Parameters: (memory is allocated by the caller)
6137 
6138     i[],j[]: the CSR of the merged matrix, which has m rows.
6139     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6140     imap2[]: similar to imap1[], but for Set2.
6141     Note we order nonzeros row-by-row and from left to right.
6142 */
6143 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6144 {
6145   PetscInt   r, m; /* Row index of mat */
6146   PetscCount t, t1, t2, b1, e1, b2, e2;
6147 
6148   PetscFunctionBegin;
6149   PetscCall(MatGetLocalSize(mat, &m, NULL));
6150   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6151   i[0]        = 0;
6152   for (r = 0; r < m; r++) { /* Do row by row merging */
6153     b1 = rowBegin1[r];
6154     e1 = rowEnd1[r];
6155     b2 = rowBegin2[r];
6156     e2 = rowEnd2[r];
6157     while (b1 < e1 && b2 < e2) {
6158       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6159         j[t]      = j1[b1];
6160         imap1[t1] = t;
6161         imap2[t2] = t;
6162         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6163         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6164         t1++;
6165         t2++;
6166         t++;
6167       } else if (j1[b1] < j2[b2]) {
6168         j[t]      = j1[b1];
6169         imap1[t1] = t;
6170         b1 += jmap1[t1 + 1] - jmap1[t1];
6171         t1++;
6172         t++;
6173       } else {
6174         j[t]      = j2[b2];
6175         imap2[t2] = t;
6176         b2 += jmap2[t2 + 1] - jmap2[t2];
6177         t2++;
6178         t++;
6179       }
6180     }
6181     /* Merge the remaining in either j1[] or j2[] */
6182     while (b1 < e1) {
6183       j[t]      = j1[b1];
6184       imap1[t1] = t;
6185       b1 += jmap1[t1 + 1] - jmap1[t1];
6186       t1++;
6187       t++;
6188     }
6189     while (b2 < e2) {
6190       j[t]      = j2[b2];
6191       imap2[t2] = t;
6192       b2 += jmap2[t2 + 1] - jmap2[t2];
6193       t2++;
6194       t++;
6195     }
6196     i[r + 1] = t;
6197   }
6198   PetscFunctionReturn(PETSC_SUCCESS);
6199 }
6200 
6201 /*
6202   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6203 
6204   Input Parameters:
6205     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6206     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6207       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6208 
6209       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6210       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6211 
6212   Output Parameters:
6213     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6214     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6215       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6216       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6217 
6218     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6219       Atot: number of entries belonging to the diagonal block.
6220       Annz: number of unique nonzeros belonging to the diagonal block.
6221       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6222         repeats (i.e., same 'i,j' pair).
6223       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6224         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6225 
6226       Atot: number of entries belonging to the diagonal block
6227       Annz: number of unique nonzeros belonging to the diagonal block.
6228 
6229     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6230 
6231     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6232 */
6233 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6234 {
6235   PetscInt    cstart, cend, rstart, rend, row, col;
6236   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6237   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6238   PetscCount  k, m, p, q, r, s, mid;
6239   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6240 
6241   PetscFunctionBegin;
6242   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6243   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6244   m = rend - rstart;
6245 
6246   /* Skip negative rows */
6247   for (k = 0; k < n; k++)
6248     if (i[k] >= 0) break;
6249 
6250   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6251      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6252   */
6253   while (k < n) {
6254     row = i[k];
6255     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6256     for (s = k; s < n; s++)
6257       if (i[s] != row) break;
6258 
6259     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6260     for (p = k; p < s; p++) {
6261       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6262       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6263     }
6264     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6265     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6266     rowBegin[row - rstart] = k;
6267     rowMid[row - rstart]   = mid;
6268     rowEnd[row - rstart]   = s;
6269 
6270     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6271     Atot += mid - k;
6272     Btot += s - mid;
6273 
6274     /* Count unique nonzeros of this diag row */
6275     for (p = k; p < mid;) {
6276       col = j[p];
6277       do {
6278         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6279         p++;
6280       } while (p < mid && j[p] == col);
6281       Annz++;
6282     }
6283 
6284     /* Count unique nonzeros of this offdiag row */
6285     for (p = mid; p < s;) {
6286       col = j[p];
6287       do {
6288         p++;
6289       } while (p < s && j[p] == col);
6290       Bnnz++;
6291     }
6292     k = s;
6293   }
6294 
6295   /* Allocation according to Atot, Btot, Annz, Bnnz */
6296   PetscCall(PetscMalloc1(Atot, &Aperm));
6297   PetscCall(PetscMalloc1(Btot, &Bperm));
6298   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6299   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6300 
6301   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6302   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6303   for (r = 0; r < m; r++) {
6304     k   = rowBegin[r];
6305     mid = rowMid[r];
6306     s   = rowEnd[r];
6307     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6308     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6309     Atot += mid - k;
6310     Btot += s - mid;
6311 
6312     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6313     for (p = k; p < mid;) {
6314       col = j[p];
6315       q   = p;
6316       do {
6317         p++;
6318       } while (p < mid && j[p] == col);
6319       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6320       Annz++;
6321     }
6322 
6323     for (p = mid; p < s;) {
6324       col = j[p];
6325       q   = p;
6326       do {
6327         p++;
6328       } while (p < s && j[p] == col);
6329       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6330       Bnnz++;
6331     }
6332   }
6333   /* Output */
6334   *Aperm_ = Aperm;
6335   *Annz_  = Annz;
6336   *Atot_  = Atot;
6337   *Ajmap_ = Ajmap;
6338   *Bperm_ = Bperm;
6339   *Bnnz_  = Bnnz;
6340   *Btot_  = Btot;
6341   *Bjmap_ = Bjmap;
6342   PetscFunctionReturn(PETSC_SUCCESS);
6343 }
6344 
6345 /*
6346   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6347 
6348   Input Parameters:
6349     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6350     nnz:  number of unique nonzeros in the merged matrix
6351     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6352     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6353 
6354   Output Parameter: (memory is allocated by the caller)
6355     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6356 
6357   Example:
6358     nnz1 = 4
6359     nnz  = 6
6360     imap = [1,3,4,5]
6361     jmap = [0,3,5,6,7]
6362    then,
6363     jmap_new = [0,0,3,3,5,6,7]
6364 */
6365 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6366 {
6367   PetscCount k, p;
6368 
6369   PetscFunctionBegin;
6370   jmap_new[0] = 0;
6371   p           = nnz;                /* p loops over jmap_new[] backwards */
6372   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6373     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6374   }
6375   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6376   PetscFunctionReturn(PETSC_SUCCESS);
6377 }
6378 
6379 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6380 {
6381   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6382 
6383   PetscFunctionBegin;
6384   PetscCall(PetscSFDestroy(&coo->sf));
6385   PetscCall(PetscFree(coo->Aperm1));
6386   PetscCall(PetscFree(coo->Bperm1));
6387   PetscCall(PetscFree(coo->Ajmap1));
6388   PetscCall(PetscFree(coo->Bjmap1));
6389   PetscCall(PetscFree(coo->Aimap2));
6390   PetscCall(PetscFree(coo->Bimap2));
6391   PetscCall(PetscFree(coo->Aperm2));
6392   PetscCall(PetscFree(coo->Bperm2));
6393   PetscCall(PetscFree(coo->Ajmap2));
6394   PetscCall(PetscFree(coo->Bjmap2));
6395   PetscCall(PetscFree(coo->Cperm1));
6396   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6397   PetscCall(PetscFree(coo));
6398   PetscFunctionReturn(PETSC_SUCCESS);
6399 }
6400 
6401 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6402 {
6403   MPI_Comm             comm;
6404   PetscMPIInt          rank, size;
6405   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6406   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6407   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6408   PetscContainer       container;
6409   MatCOOStruct_MPIAIJ *coo;
6410 
6411   PetscFunctionBegin;
6412   PetscCall(PetscFree(mpiaij->garray));
6413   PetscCall(VecDestroy(&mpiaij->lvec));
6414 #if defined(PETSC_USE_CTABLE)
6415   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6416 #else
6417   PetscCall(PetscFree(mpiaij->colmap));
6418 #endif
6419   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6420   mat->assembled     = PETSC_FALSE;
6421   mat->was_assembled = PETSC_FALSE;
6422 
6423   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6424   PetscCallMPI(MPI_Comm_size(comm, &size));
6425   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6426   PetscCall(PetscLayoutSetUp(mat->rmap));
6427   PetscCall(PetscLayoutSetUp(mat->cmap));
6428   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6429   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6430   PetscCall(MatGetLocalSize(mat, &m, &n));
6431   PetscCall(MatGetSize(mat, &M, &N));
6432 
6433   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6434   /* entries come first, then local rows, then remote rows.                     */
6435   PetscCount n1 = coo_n, *perm1;
6436   PetscInt  *i1 = coo_i, *j1 = coo_j;
6437 
6438   PetscCall(PetscMalloc1(n1, &perm1));
6439   for (k = 0; k < n1; k++) perm1[k] = k;
6440 
6441   /* Manipulate indices so that entries with negative row or col indices will have smallest
6442      row indices, local entries will have greater but negative row indices, and remote entries
6443      will have positive row indices.
6444   */
6445   for (k = 0; k < n1; k++) {
6446     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6447     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6448     else {
6449       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6450       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6451     }
6452   }
6453 
6454   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6455   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6456 
6457   /* Advance k to the first entry we need to take care of */
6458   for (k = 0; k < n1; k++)
6459     if (i1[k] > PETSC_MIN_INT) break;
6460   PetscInt i1start = k;
6461 
6462   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6463   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6464 
6465   /*           Send remote rows to their owner                                  */
6466   /* Find which rows should be sent to which remote ranks*/
6467   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6468   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6469   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6470   const PetscInt *ranges;
6471   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6472 
6473   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6474   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6475   for (k = rem; k < n1;) {
6476     PetscMPIInt owner;
6477     PetscInt    firstRow, lastRow;
6478 
6479     /* Locate a row range */
6480     firstRow = i1[k]; /* first row of this owner */
6481     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6482     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6483 
6484     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6485     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6486 
6487     /* All entries in [k,p) belong to this remote owner */
6488     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6489       PetscMPIInt *sendto2;
6490       PetscInt    *nentries2;
6491       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6492 
6493       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6494       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6495       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6496       PetscCall(PetscFree2(sendto, nentries2));
6497       sendto   = sendto2;
6498       nentries = nentries2;
6499       maxNsend = maxNsend2;
6500     }
6501     sendto[nsend]   = owner;
6502     nentries[nsend] = p - k;
6503     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6504     nsend++;
6505     k = p;
6506   }
6507 
6508   /* Build 1st SF to know offsets on remote to send data */
6509   PetscSF      sf1;
6510   PetscInt     nroots = 1, nroots2 = 0;
6511   PetscInt     nleaves = nsend, nleaves2 = 0;
6512   PetscInt    *offsets;
6513   PetscSFNode *iremote;
6514 
6515   PetscCall(PetscSFCreate(comm, &sf1));
6516   PetscCall(PetscMalloc1(nsend, &iremote));
6517   PetscCall(PetscMalloc1(nsend, &offsets));
6518   for (k = 0; k < nsend; k++) {
6519     iremote[k].rank  = sendto[k];
6520     iremote[k].index = 0;
6521     nleaves2 += nentries[k];
6522     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6523   }
6524   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6525   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6526   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6527   PetscCall(PetscSFDestroy(&sf1));
6528   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6529 
6530   /* Build 2nd SF to send remote COOs to their owner */
6531   PetscSF sf2;
6532   nroots  = nroots2;
6533   nleaves = nleaves2;
6534   PetscCall(PetscSFCreate(comm, &sf2));
6535   PetscCall(PetscSFSetFromOptions(sf2));
6536   PetscCall(PetscMalloc1(nleaves, &iremote));
6537   p = 0;
6538   for (k = 0; k < nsend; k++) {
6539     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6540     for (q = 0; q < nentries[k]; q++, p++) {
6541       iremote[p].rank  = sendto[k];
6542       iremote[p].index = offsets[k] + q;
6543     }
6544   }
6545   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6546 
6547   /* Send the remote COOs to their owner */
6548   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6549   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6550   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6551   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6552   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6553   PetscInt *i1prem = i1 ? i1 + rem : NULL; /* silence ubsan warnings about pointer arithmetic on null pointer */
6554   PetscInt *j1prem = j1 ? j1 + rem : NULL;
6555   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6556   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6557   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6558   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6559 
6560   PetscCall(PetscFree(offsets));
6561   PetscCall(PetscFree2(sendto, nentries));
6562 
6563   /* Sort received COOs by row along with the permutation array     */
6564   for (k = 0; k < n2; k++) perm2[k] = k;
6565   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6566 
6567   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6568   PetscCount *Cperm1;
6569   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6570   PetscCount *perm1prem = perm1 ? perm1 + rem : NULL;
6571   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6572   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6573 
6574   /* Support for HYPRE matrices, kind of a hack.
6575      Swap min column with diagonal so that diagonal values will go first */
6576   PetscBool   hypre;
6577   const char *name;
6578   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6579   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6580   if (hypre) {
6581     PetscInt *minj;
6582     PetscBT   hasdiag;
6583 
6584     PetscCall(PetscBTCreate(m, &hasdiag));
6585     PetscCall(PetscMalloc1(m, &minj));
6586     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6587     for (k = i1start; k < rem; k++) {
6588       if (j1[k] < cstart || j1[k] >= cend) continue;
6589       const PetscInt rindex = i1[k] - rstart;
6590       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6591       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6592     }
6593     for (k = 0; k < n2; k++) {
6594       if (j2[k] < cstart || j2[k] >= cend) continue;
6595       const PetscInt rindex = i2[k] - rstart;
6596       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6597       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6598     }
6599     for (k = i1start; k < rem; k++) {
6600       const PetscInt rindex = i1[k] - rstart;
6601       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6602       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6603       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6604     }
6605     for (k = 0; k < n2; k++) {
6606       const PetscInt rindex = i2[k] - rstart;
6607       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6608       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6609       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6610     }
6611     PetscCall(PetscBTDestroy(&hasdiag));
6612     PetscCall(PetscFree(minj));
6613   }
6614 
6615   /* Split local COOs and received COOs into diag/offdiag portions */
6616   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6617   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6618   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6619   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6620   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6621   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6622 
6623   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6624   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6625   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6626   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6627 
6628   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6629   PetscInt *Ai, *Bi;
6630   PetscInt *Aj, *Bj;
6631 
6632   PetscCall(PetscMalloc1(m + 1, &Ai));
6633   PetscCall(PetscMalloc1(m + 1, &Bi));
6634   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6635   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6636 
6637   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6638   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6639   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6640   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6641   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6642 
6643   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6644   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6645 
6646   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6647   /* expect nonzeros in A/B most likely have local contributing entries        */
6648   PetscInt    Annz = Ai[m];
6649   PetscInt    Bnnz = Bi[m];
6650   PetscCount *Ajmap1_new, *Bjmap1_new;
6651 
6652   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6653   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6654 
6655   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6656   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6657 
6658   PetscCall(PetscFree(Aimap1));
6659   PetscCall(PetscFree(Ajmap1));
6660   PetscCall(PetscFree(Bimap1));
6661   PetscCall(PetscFree(Bjmap1));
6662   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6663   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6664   PetscCall(PetscFree(perm1));
6665   PetscCall(PetscFree3(i2, j2, perm2));
6666 
6667   Ajmap1 = Ajmap1_new;
6668   Bjmap1 = Bjmap1_new;
6669 
6670   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6671   if (Annz < Annz1 + Annz2) {
6672     PetscInt *Aj_new;
6673     PetscCall(PetscMalloc1(Annz, &Aj_new));
6674     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6675     PetscCall(PetscFree(Aj));
6676     Aj = Aj_new;
6677   }
6678 
6679   if (Bnnz < Bnnz1 + Bnnz2) {
6680     PetscInt *Bj_new;
6681     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6682     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6683     PetscCall(PetscFree(Bj));
6684     Bj = Bj_new;
6685   }
6686 
6687   /* Create new submatrices for on-process and off-process coupling                  */
6688   PetscScalar     *Aa, *Ba;
6689   MatType          rtype;
6690   Mat_SeqAIJ      *a, *b;
6691   PetscObjectState state;
6692   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6693   PetscCall(PetscCalloc1(Bnnz, &Ba));
6694   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6695   if (cstart) {
6696     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6697   }
6698 
6699   PetscCall(MatGetRootType_Private(mat, &rtype));
6700 
6701   MatSeqXAIJGetOptions_Private(mpiaij->A);
6702   PetscCall(MatDestroy(&mpiaij->A));
6703   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6704   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6705   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6706 
6707   MatSeqXAIJGetOptions_Private(mpiaij->B);
6708   PetscCall(MatDestroy(&mpiaij->B));
6709   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6710   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6711   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6712 
6713   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6714   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6715   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6716   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6717 
6718   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6719   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6720   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6721   a->free_a = b->free_a = PETSC_TRUE;
6722   a->free_ij = b->free_ij = PETSC_TRUE;
6723 
6724   /* conversion must happen AFTER multiply setup */
6725   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6726   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6727   PetscCall(VecDestroy(&mpiaij->lvec));
6728   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6729 
6730   // Put the COO struct in a container and then attach that to the matrix
6731   PetscCall(PetscMalloc1(1, &coo));
6732   coo->n       = coo_n;
6733   coo->sf      = sf2;
6734   coo->sendlen = nleaves;
6735   coo->recvlen = nroots;
6736   coo->Annz    = Annz;
6737   coo->Bnnz    = Bnnz;
6738   coo->Annz2   = Annz2;
6739   coo->Bnnz2   = Bnnz2;
6740   coo->Atot1   = Atot1;
6741   coo->Atot2   = Atot2;
6742   coo->Btot1   = Btot1;
6743   coo->Btot2   = Btot2;
6744   coo->Ajmap1  = Ajmap1;
6745   coo->Aperm1  = Aperm1;
6746   coo->Bjmap1  = Bjmap1;
6747   coo->Bperm1  = Bperm1;
6748   coo->Aimap2  = Aimap2;
6749   coo->Ajmap2  = Ajmap2;
6750   coo->Aperm2  = Aperm2;
6751   coo->Bimap2  = Bimap2;
6752   coo->Bjmap2  = Bjmap2;
6753   coo->Bperm2  = Bperm2;
6754   coo->Cperm1  = Cperm1;
6755   // Allocate in preallocation. If not used, it has zero cost on host
6756   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6757   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6758   PetscCall(PetscContainerSetPointer(container, coo));
6759   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6760   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6761   PetscCall(PetscContainerDestroy(&container));
6762   PetscFunctionReturn(PETSC_SUCCESS);
6763 }
6764 
6765 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6766 {
6767   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6768   Mat                  A = mpiaij->A, B = mpiaij->B;
6769   PetscScalar         *Aa, *Ba;
6770   PetscScalar         *sendbuf, *recvbuf;
6771   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6772   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6773   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6774   const PetscCount    *Cperm1;
6775   PetscContainer       container;
6776   MatCOOStruct_MPIAIJ *coo;
6777 
6778   PetscFunctionBegin;
6779   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6780   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6781   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6782   sendbuf = coo->sendbuf;
6783   recvbuf = coo->recvbuf;
6784   Ajmap1  = coo->Ajmap1;
6785   Ajmap2  = coo->Ajmap2;
6786   Aimap2  = coo->Aimap2;
6787   Bjmap1  = coo->Bjmap1;
6788   Bjmap2  = coo->Bjmap2;
6789   Bimap2  = coo->Bimap2;
6790   Aperm1  = coo->Aperm1;
6791   Aperm2  = coo->Aperm2;
6792   Bperm1  = coo->Bperm1;
6793   Bperm2  = coo->Bperm2;
6794   Cperm1  = coo->Cperm1;
6795 
6796   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6797   PetscCall(MatSeqAIJGetArray(B, &Ba));
6798 
6799   /* Pack entries to be sent to remote */
6800   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6801 
6802   /* Send remote entries to their owner and overlap the communication with local computation */
6803   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6804   /* Add local entries to A and B */
6805   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6806     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6807     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6808     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6809   }
6810   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6811     PetscScalar sum = 0.0;
6812     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6813     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6814   }
6815   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6816 
6817   /* Add received remote entries to A and B */
6818   for (PetscCount i = 0; i < coo->Annz2; i++) {
6819     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6820   }
6821   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6822     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6823   }
6824   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6825   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6826   PetscFunctionReturn(PETSC_SUCCESS);
6827 }
6828 
6829 /*MC
6830    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6831 
6832    Options Database Keys:
6833 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6834 
6835    Level: beginner
6836 
6837    Notes:
6838    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6839     in this case the values associated with the rows and columns one passes in are set to zero
6840     in the matrix
6841 
6842     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6843     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6844 
6845 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6846 M*/
6847 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6848 {
6849   Mat_MPIAIJ *b;
6850   PetscMPIInt size;
6851 
6852   PetscFunctionBegin;
6853   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6854 
6855   PetscCall(PetscNew(&b));
6856   B->data       = (void *)b;
6857   B->ops[0]     = MatOps_Values;
6858   B->assembled  = PETSC_FALSE;
6859   B->insertmode = NOT_SET_VALUES;
6860   b->size       = size;
6861 
6862   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6863 
6864   /* build cache for off array entries formed */
6865   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6866 
6867   b->donotstash  = PETSC_FALSE;
6868   b->colmap      = NULL;
6869   b->garray      = NULL;
6870   b->roworiented = PETSC_TRUE;
6871 
6872   /* stuff used for matrix vector multiply */
6873   b->lvec  = NULL;
6874   b->Mvctx = NULL;
6875 
6876   /* stuff for MatGetRow() */
6877   b->rowindices   = NULL;
6878   b->rowvalues    = NULL;
6879   b->getrowactive = PETSC_FALSE;
6880 
6881   /* flexible pointer used in CUSPARSE classes */
6882   b->spptr = NULL;
6883 
6884   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6885   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6886   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6887   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6888   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6889   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6890   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6891   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6892   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6893   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6894 #if defined(PETSC_HAVE_CUDA)
6895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6896 #endif
6897 #if defined(PETSC_HAVE_HIP)
6898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6899 #endif
6900 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6902 #endif
6903 #if defined(PETSC_HAVE_MKL_SPARSE)
6904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6905 #endif
6906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6910 #if defined(PETSC_HAVE_ELEMENTAL)
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6912 #endif
6913 #if defined(PETSC_HAVE_SCALAPACK)
6914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6915 #endif
6916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6918 #if defined(PETSC_HAVE_HYPRE)
6919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6921 #endif
6922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6926   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6927   PetscFunctionReturn(PETSC_SUCCESS);
6928 }
6929 
6930 /*@C
6931   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6932   and "off-diagonal" part of the matrix in CSR format.
6933 
6934   Collective
6935 
6936   Input Parameters:
6937 + comm - MPI communicator
6938 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6939 . n    - This value should be the same as the local size used in creating the
6940          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6941          calculated if `N` is given) For square matrices `n` is almost always `m`.
6942 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6943 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6944 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6945 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6946 . a    - matrix values
6947 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6948 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6949 - oa   - matrix values
6950 
6951   Output Parameter:
6952 . mat - the matrix
6953 
6954   Level: advanced
6955 
6956   Notes:
6957   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6958   must free the arrays once the matrix has been destroyed and not before.
6959 
6960   The `i` and `j` indices are 0 based
6961 
6962   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6963 
6964   This sets local rows and cannot be used to set off-processor values.
6965 
6966   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6967   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6968   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6969   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6970   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6971   communication if it is known that only local entries will be set.
6972 
6973 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6974           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6975 @*/
6976 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6977 {
6978   Mat_MPIAIJ *maij;
6979 
6980   PetscFunctionBegin;
6981   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6982   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6983   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6984   PetscCall(MatCreate(comm, mat));
6985   PetscCall(MatSetSizes(*mat, m, n, M, N));
6986   PetscCall(MatSetType(*mat, MATMPIAIJ));
6987   maij = (Mat_MPIAIJ *)(*mat)->data;
6988 
6989   (*mat)->preallocated = PETSC_TRUE;
6990 
6991   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6992   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6993 
6994   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6995   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6996 
6997   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6998   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6999   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7000   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7001   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7002   PetscFunctionReturn(PETSC_SUCCESS);
7003 }
7004 
7005 typedef struct {
7006   Mat       *mp;    /* intermediate products */
7007   PetscBool *mptmp; /* is the intermediate product temporary ? */
7008   PetscInt   cp;    /* number of intermediate products */
7009 
7010   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7011   PetscInt    *startsj_s, *startsj_r;
7012   PetscScalar *bufa;
7013   Mat          P_oth;
7014 
7015   /* may take advantage of merging product->B */
7016   Mat Bloc; /* B-local by merging diag and off-diag */
7017 
7018   /* cusparse does not have support to split between symbolic and numeric phases.
7019      When api_user is true, we don't need to update the numerical values
7020      of the temporary storage */
7021   PetscBool reusesym;
7022 
7023   /* support for COO values insertion */
7024   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7025   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7026   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7027   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7028   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7029   PetscMemType mtype;
7030 
7031   /* customization */
7032   PetscBool abmerge;
7033   PetscBool P_oth_bind;
7034 } MatMatMPIAIJBACKEND;
7035 
7036 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7037 {
7038   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7039   PetscInt             i;
7040 
7041   PetscFunctionBegin;
7042   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7043   PetscCall(PetscFree(mmdata->bufa));
7044   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7045   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7046   PetscCall(MatDestroy(&mmdata->P_oth));
7047   PetscCall(MatDestroy(&mmdata->Bloc));
7048   PetscCall(PetscSFDestroy(&mmdata->sf));
7049   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7050   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7051   PetscCall(PetscFree(mmdata->own[0]));
7052   PetscCall(PetscFree(mmdata->own));
7053   PetscCall(PetscFree(mmdata->off[0]));
7054   PetscCall(PetscFree(mmdata->off));
7055   PetscCall(PetscFree(mmdata));
7056   PetscFunctionReturn(PETSC_SUCCESS);
7057 }
7058 
7059 /* Copy selected n entries with indices in idx[] of A to v[].
7060    If idx is NULL, copy the whole data array of A to v[]
7061  */
7062 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7063 {
7064   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7065 
7066   PetscFunctionBegin;
7067   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7068   if (f) {
7069     PetscCall((*f)(A, n, idx, v));
7070   } else {
7071     const PetscScalar *vv;
7072 
7073     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7074     if (n && idx) {
7075       PetscScalar    *w  = v;
7076       const PetscInt *oi = idx;
7077       PetscInt        j;
7078 
7079       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7080     } else {
7081       PetscCall(PetscArraycpy(v, vv, n));
7082     }
7083     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7084   }
7085   PetscFunctionReturn(PETSC_SUCCESS);
7086 }
7087 
7088 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7089 {
7090   MatMatMPIAIJBACKEND *mmdata;
7091   PetscInt             i, n_d, n_o;
7092 
7093   PetscFunctionBegin;
7094   MatCheckProduct(C, 1);
7095   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7096   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7097   if (!mmdata->reusesym) { /* update temporary matrices */
7098     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7099     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7100   }
7101   mmdata->reusesym = PETSC_FALSE;
7102 
7103   for (i = 0; i < mmdata->cp; i++) {
7104     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7105     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7106   }
7107   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7108     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7109 
7110     if (mmdata->mptmp[i]) continue;
7111     if (noff) {
7112       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7113 
7114       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7115       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7116       n_o += noff;
7117       n_d += nown;
7118     } else {
7119       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7120 
7121       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7122       n_d += mm->nz;
7123     }
7124   }
7125   if (mmdata->hasoffproc) { /* offprocess insertion */
7126     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7127     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7128   }
7129   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7130   PetscFunctionReturn(PETSC_SUCCESS);
7131 }
7132 
7133 /* Support for Pt * A, A * P, or Pt * A * P */
7134 #define MAX_NUMBER_INTERMEDIATE 4
7135 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7136 {
7137   Mat_Product           *product = C->product;
7138   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7139   Mat_MPIAIJ            *a, *p;
7140   MatMatMPIAIJBACKEND   *mmdata;
7141   ISLocalToGlobalMapping P_oth_l2g = NULL;
7142   IS                     glob      = NULL;
7143   const char            *prefix;
7144   char                   pprefix[256];
7145   const PetscInt        *globidx, *P_oth_idx;
7146   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7147   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7148   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7149                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7150                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7151   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7152 
7153   MatProductType ptype;
7154   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7155   PetscMPIInt    size;
7156 
7157   PetscFunctionBegin;
7158   MatCheckProduct(C, 1);
7159   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7160   ptype = product->type;
7161   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7162     ptype                                          = MATPRODUCT_AB;
7163     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7164   }
7165   switch (ptype) {
7166   case MATPRODUCT_AB:
7167     A          = product->A;
7168     P          = product->B;
7169     m          = A->rmap->n;
7170     n          = P->cmap->n;
7171     M          = A->rmap->N;
7172     N          = P->cmap->N;
7173     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7174     break;
7175   case MATPRODUCT_AtB:
7176     P          = product->A;
7177     A          = product->B;
7178     m          = P->cmap->n;
7179     n          = A->cmap->n;
7180     M          = P->cmap->N;
7181     N          = A->cmap->N;
7182     hasoffproc = PETSC_TRUE;
7183     break;
7184   case MATPRODUCT_PtAP:
7185     A          = product->A;
7186     P          = product->B;
7187     m          = P->cmap->n;
7188     n          = P->cmap->n;
7189     M          = P->cmap->N;
7190     N          = P->cmap->N;
7191     hasoffproc = PETSC_TRUE;
7192     break;
7193   default:
7194     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7195   }
7196   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7197   if (size == 1) hasoffproc = PETSC_FALSE;
7198 
7199   /* defaults */
7200   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7201     mp[i]    = NULL;
7202     mptmp[i] = PETSC_FALSE;
7203     rmapt[i] = -1;
7204     cmapt[i] = -1;
7205     rmapa[i] = NULL;
7206     cmapa[i] = NULL;
7207   }
7208 
7209   /* customization */
7210   PetscCall(PetscNew(&mmdata));
7211   mmdata->reusesym = product->api_user;
7212   if (ptype == MATPRODUCT_AB) {
7213     if (product->api_user) {
7214       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7215       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7216       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7217       PetscOptionsEnd();
7218     } else {
7219       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7220       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7221       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7222       PetscOptionsEnd();
7223     }
7224   } else if (ptype == MATPRODUCT_PtAP) {
7225     if (product->api_user) {
7226       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7227       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7228       PetscOptionsEnd();
7229     } else {
7230       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7231       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7232       PetscOptionsEnd();
7233     }
7234   }
7235   a = (Mat_MPIAIJ *)A->data;
7236   p = (Mat_MPIAIJ *)P->data;
7237   PetscCall(MatSetSizes(C, m, n, M, N));
7238   PetscCall(PetscLayoutSetUp(C->rmap));
7239   PetscCall(PetscLayoutSetUp(C->cmap));
7240   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7241   PetscCall(MatGetOptionsPrefix(C, &prefix));
7242 
7243   cp = 0;
7244   switch (ptype) {
7245   case MATPRODUCT_AB: /* A * P */
7246     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7247 
7248     /* A_diag * P_local (merged or not) */
7249     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7250       /* P is product->B */
7251       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7252       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7253       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7254       PetscCall(MatProductSetFill(mp[cp], product->fill));
7255       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7256       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7257       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7258       mp[cp]->product->api_user = product->api_user;
7259       PetscCall(MatProductSetFromOptions(mp[cp]));
7260       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7261       PetscCall(ISGetIndices(glob, &globidx));
7262       rmapt[cp] = 1;
7263       cmapt[cp] = 2;
7264       cmapa[cp] = globidx;
7265       mptmp[cp] = PETSC_FALSE;
7266       cp++;
7267     } else { /* A_diag * P_diag and A_diag * P_off */
7268       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7269       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7270       PetscCall(MatProductSetFill(mp[cp], product->fill));
7271       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7272       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7273       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7274       mp[cp]->product->api_user = product->api_user;
7275       PetscCall(MatProductSetFromOptions(mp[cp]));
7276       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7277       rmapt[cp] = 1;
7278       cmapt[cp] = 1;
7279       mptmp[cp] = PETSC_FALSE;
7280       cp++;
7281       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7282       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7283       PetscCall(MatProductSetFill(mp[cp], product->fill));
7284       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7285       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7286       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7287       mp[cp]->product->api_user = product->api_user;
7288       PetscCall(MatProductSetFromOptions(mp[cp]));
7289       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7290       rmapt[cp] = 1;
7291       cmapt[cp] = 2;
7292       cmapa[cp] = p->garray;
7293       mptmp[cp] = PETSC_FALSE;
7294       cp++;
7295     }
7296 
7297     /* A_off * P_other */
7298     if (mmdata->P_oth) {
7299       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7300       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7301       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7302       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7303       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7304       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7305       PetscCall(MatProductSetFill(mp[cp], product->fill));
7306       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7307       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7308       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7309       mp[cp]->product->api_user = product->api_user;
7310       PetscCall(MatProductSetFromOptions(mp[cp]));
7311       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7312       rmapt[cp] = 1;
7313       cmapt[cp] = 2;
7314       cmapa[cp] = P_oth_idx;
7315       mptmp[cp] = PETSC_FALSE;
7316       cp++;
7317     }
7318     break;
7319 
7320   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7321     /* A is product->B */
7322     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7323     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7324       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7325       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7326       PetscCall(MatProductSetFill(mp[cp], product->fill));
7327       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7328       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7329       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7330       mp[cp]->product->api_user = product->api_user;
7331       PetscCall(MatProductSetFromOptions(mp[cp]));
7332       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7333       PetscCall(ISGetIndices(glob, &globidx));
7334       rmapt[cp] = 2;
7335       rmapa[cp] = globidx;
7336       cmapt[cp] = 2;
7337       cmapa[cp] = globidx;
7338       mptmp[cp] = PETSC_FALSE;
7339       cp++;
7340     } else {
7341       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7342       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7343       PetscCall(MatProductSetFill(mp[cp], product->fill));
7344       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7345       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7346       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7347       mp[cp]->product->api_user = product->api_user;
7348       PetscCall(MatProductSetFromOptions(mp[cp]));
7349       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7350       PetscCall(ISGetIndices(glob, &globidx));
7351       rmapt[cp] = 1;
7352       cmapt[cp] = 2;
7353       cmapa[cp] = globidx;
7354       mptmp[cp] = PETSC_FALSE;
7355       cp++;
7356       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7357       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7358       PetscCall(MatProductSetFill(mp[cp], product->fill));
7359       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7360       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7361       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7362       mp[cp]->product->api_user = product->api_user;
7363       PetscCall(MatProductSetFromOptions(mp[cp]));
7364       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7365       rmapt[cp] = 2;
7366       rmapa[cp] = p->garray;
7367       cmapt[cp] = 2;
7368       cmapa[cp] = globidx;
7369       mptmp[cp] = PETSC_FALSE;
7370       cp++;
7371     }
7372     break;
7373   case MATPRODUCT_PtAP:
7374     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7375     /* P is product->B */
7376     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7377     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7378     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7379     PetscCall(MatProductSetFill(mp[cp], product->fill));
7380     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7381     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7382     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7383     mp[cp]->product->api_user = product->api_user;
7384     PetscCall(MatProductSetFromOptions(mp[cp]));
7385     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7386     PetscCall(ISGetIndices(glob, &globidx));
7387     rmapt[cp] = 2;
7388     rmapa[cp] = globidx;
7389     cmapt[cp] = 2;
7390     cmapa[cp] = globidx;
7391     mptmp[cp] = PETSC_FALSE;
7392     cp++;
7393     if (mmdata->P_oth) {
7394       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7395       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7396       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7397       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7398       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7399       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7400       PetscCall(MatProductSetFill(mp[cp], product->fill));
7401       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7402       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7403       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7404       mp[cp]->product->api_user = product->api_user;
7405       PetscCall(MatProductSetFromOptions(mp[cp]));
7406       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7407       mptmp[cp] = PETSC_TRUE;
7408       cp++;
7409       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7410       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7411       PetscCall(MatProductSetFill(mp[cp], product->fill));
7412       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7413       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7414       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7415       mp[cp]->product->api_user = product->api_user;
7416       PetscCall(MatProductSetFromOptions(mp[cp]));
7417       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7418       rmapt[cp] = 2;
7419       rmapa[cp] = globidx;
7420       cmapt[cp] = 2;
7421       cmapa[cp] = P_oth_idx;
7422       mptmp[cp] = PETSC_FALSE;
7423       cp++;
7424     }
7425     break;
7426   default:
7427     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7428   }
7429   /* sanity check */
7430   if (size > 1)
7431     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7432 
7433   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7434   for (i = 0; i < cp; i++) {
7435     mmdata->mp[i]    = mp[i];
7436     mmdata->mptmp[i] = mptmp[i];
7437   }
7438   mmdata->cp             = cp;
7439   C->product->data       = mmdata;
7440   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7441   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7442 
7443   /* memory type */
7444   mmdata->mtype = PETSC_MEMTYPE_HOST;
7445   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7446   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7447   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7448   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7449   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7450   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7451 
7452   /* prepare coo coordinates for values insertion */
7453 
7454   /* count total nonzeros of those intermediate seqaij Mats
7455     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7456     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7457     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7458   */
7459   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7460     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7461     if (mptmp[cp]) continue;
7462     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7463       const PetscInt *rmap = rmapa[cp];
7464       const PetscInt  mr   = mp[cp]->rmap->n;
7465       const PetscInt  rs   = C->rmap->rstart;
7466       const PetscInt  re   = C->rmap->rend;
7467       const PetscInt *ii   = mm->i;
7468       for (i = 0; i < mr; i++) {
7469         const PetscInt gr = rmap[i];
7470         const PetscInt nz = ii[i + 1] - ii[i];
7471         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7472         else ncoo_oown += nz;                  /* this row is local */
7473       }
7474     } else ncoo_d += mm->nz;
7475   }
7476 
7477   /*
7478     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7479 
7480     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7481 
7482     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7483 
7484     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7485     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7486     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7487 
7488     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7489     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7490   */
7491   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7492   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7493 
7494   /* gather (i,j) of nonzeros inserted by remote procs */
7495   if (hasoffproc) {
7496     PetscSF  msf;
7497     PetscInt ncoo2, *coo_i2, *coo_j2;
7498 
7499     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7500     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7501     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7502 
7503     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7504       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7505       PetscInt   *idxoff = mmdata->off[cp];
7506       PetscInt   *idxown = mmdata->own[cp];
7507       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7508         const PetscInt *rmap = rmapa[cp];
7509         const PetscInt *cmap = cmapa[cp];
7510         const PetscInt *ii   = mm->i;
7511         PetscInt       *coi  = coo_i + ncoo_o;
7512         PetscInt       *coj  = coo_j + ncoo_o;
7513         const PetscInt  mr   = mp[cp]->rmap->n;
7514         const PetscInt  rs   = C->rmap->rstart;
7515         const PetscInt  re   = C->rmap->rend;
7516         const PetscInt  cs   = C->cmap->rstart;
7517         for (i = 0; i < mr; i++) {
7518           const PetscInt *jj = mm->j + ii[i];
7519           const PetscInt  gr = rmap[i];
7520           const PetscInt  nz = ii[i + 1] - ii[i];
7521           if (gr < rs || gr >= re) { /* this is an offproc row */
7522             for (j = ii[i]; j < ii[i + 1]; j++) {
7523               *coi++    = gr;
7524               *idxoff++ = j;
7525             }
7526             if (!cmapt[cp]) { /* already global */
7527               for (j = 0; j < nz; j++) *coj++ = jj[j];
7528             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7529               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7530             } else { /* offdiag */
7531               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7532             }
7533             ncoo_o += nz;
7534           } else { /* this is a local row */
7535             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7536           }
7537         }
7538       }
7539       mmdata->off[cp + 1] = idxoff;
7540       mmdata->own[cp + 1] = idxown;
7541     }
7542 
7543     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7544     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7545     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7546     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7547     ncoo = ncoo_d + ncoo_oown + ncoo2;
7548     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7549     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7550     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7551     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7552     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7553     PetscCall(PetscFree2(coo_i, coo_j));
7554     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7555     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7556     coo_i = coo_i2;
7557     coo_j = coo_j2;
7558   } else { /* no offproc values insertion */
7559     ncoo = ncoo_d;
7560     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7561 
7562     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7563     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7564     PetscCall(PetscSFSetUp(mmdata->sf));
7565   }
7566   mmdata->hasoffproc = hasoffproc;
7567 
7568   /* gather (i,j) of nonzeros inserted locally */
7569   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7570     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7571     PetscInt       *coi  = coo_i + ncoo_d;
7572     PetscInt       *coj  = coo_j + ncoo_d;
7573     const PetscInt *jj   = mm->j;
7574     const PetscInt *ii   = mm->i;
7575     const PetscInt *cmap = cmapa[cp];
7576     const PetscInt *rmap = rmapa[cp];
7577     const PetscInt  mr   = mp[cp]->rmap->n;
7578     const PetscInt  rs   = C->rmap->rstart;
7579     const PetscInt  re   = C->rmap->rend;
7580     const PetscInt  cs   = C->cmap->rstart;
7581 
7582     if (mptmp[cp]) continue;
7583     if (rmapt[cp] == 1) { /* consecutive rows */
7584       /* fill coo_i */
7585       for (i = 0; i < mr; i++) {
7586         const PetscInt gr = i + rs;
7587         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7588       }
7589       /* fill coo_j */
7590       if (!cmapt[cp]) { /* type-0, already global */
7591         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7592       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7593         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7594       } else {                                            /* type-2, local to global for sparse columns */
7595         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7596       }
7597       ncoo_d += mm->nz;
7598     } else if (rmapt[cp] == 2) { /* sparse rows */
7599       for (i = 0; i < mr; i++) {
7600         const PetscInt *jj = mm->j + ii[i];
7601         const PetscInt  gr = rmap[i];
7602         const PetscInt  nz = ii[i + 1] - ii[i];
7603         if (gr >= rs && gr < re) { /* local rows */
7604           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7605           if (!cmapt[cp]) { /* type-0, already global */
7606             for (j = 0; j < nz; j++) *coj++ = jj[j];
7607           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7608             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7609           } else { /* type-2, local to global for sparse columns */
7610             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7611           }
7612           ncoo_d += nz;
7613         }
7614       }
7615     }
7616   }
7617   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7618   PetscCall(ISDestroy(&glob));
7619   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7620   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7621   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7622   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7623 
7624   /* preallocate with COO data */
7625   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7626   PetscCall(PetscFree2(coo_i, coo_j));
7627   PetscFunctionReturn(PETSC_SUCCESS);
7628 }
7629 
7630 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7631 {
7632   Mat_Product *product = mat->product;
7633 #if defined(PETSC_HAVE_DEVICE)
7634   PetscBool match  = PETSC_FALSE;
7635   PetscBool usecpu = PETSC_FALSE;
7636 #else
7637   PetscBool match = PETSC_TRUE;
7638 #endif
7639 
7640   PetscFunctionBegin;
7641   MatCheckProduct(mat, 1);
7642 #if defined(PETSC_HAVE_DEVICE)
7643   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7644   if (match) { /* we can always fallback to the CPU if requested */
7645     switch (product->type) {
7646     case MATPRODUCT_AB:
7647       if (product->api_user) {
7648         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7649         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7650         PetscOptionsEnd();
7651       } else {
7652         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7653         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7654         PetscOptionsEnd();
7655       }
7656       break;
7657     case MATPRODUCT_AtB:
7658       if (product->api_user) {
7659         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7660         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7661         PetscOptionsEnd();
7662       } else {
7663         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7664         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7665         PetscOptionsEnd();
7666       }
7667       break;
7668     case MATPRODUCT_PtAP:
7669       if (product->api_user) {
7670         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7671         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7672         PetscOptionsEnd();
7673       } else {
7674         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7675         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7676         PetscOptionsEnd();
7677       }
7678       break;
7679     default:
7680       break;
7681     }
7682     match = (PetscBool)!usecpu;
7683   }
7684 #endif
7685   if (match) {
7686     switch (product->type) {
7687     case MATPRODUCT_AB:
7688     case MATPRODUCT_AtB:
7689     case MATPRODUCT_PtAP:
7690       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7691       break;
7692     default:
7693       break;
7694     }
7695   }
7696   /* fallback to MPIAIJ ops */
7697   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7698   PetscFunctionReturn(PETSC_SUCCESS);
7699 }
7700 
7701 /*
7702    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7703 
7704    n - the number of block indices in cc[]
7705    cc - the block indices (must be large enough to contain the indices)
7706 */
7707 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7708 {
7709   PetscInt        cnt = -1, nidx, j;
7710   const PetscInt *idx;
7711 
7712   PetscFunctionBegin;
7713   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7714   if (nidx) {
7715     cnt     = 0;
7716     cc[cnt] = idx[0] / bs;
7717     for (j = 1; j < nidx; j++) {
7718       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7719     }
7720   }
7721   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7722   *n = cnt + 1;
7723   PetscFunctionReturn(PETSC_SUCCESS);
7724 }
7725 
7726 /*
7727     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7728 
7729     ncollapsed - the number of block indices
7730     collapsed - the block indices (must be large enough to contain the indices)
7731 */
7732 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7733 {
7734   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7735 
7736   PetscFunctionBegin;
7737   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7738   for (i = start + 1; i < start + bs; i++) {
7739     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7740     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7741     cprevtmp = cprev;
7742     cprev    = merged;
7743     merged   = cprevtmp;
7744   }
7745   *ncollapsed = nprev;
7746   if (collapsed) *collapsed = cprev;
7747   PetscFunctionReturn(PETSC_SUCCESS);
7748 }
7749 
7750 /*
7751  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7752 
7753  Input Parameter:
7754  . Amat - matrix
7755  - symmetrize - make the result symmetric
7756  + scale - scale with diagonal
7757 
7758  Output Parameter:
7759  . a_Gmat - output scalar graph >= 0
7760 
7761 */
7762 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7763 {
7764   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7765   MPI_Comm  comm;
7766   Mat       Gmat;
7767   PetscBool ismpiaij, isseqaij;
7768   Mat       a, b, c;
7769   MatType   jtype;
7770 
7771   PetscFunctionBegin;
7772   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7773   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7774   PetscCall(MatGetSize(Amat, &MM, &NN));
7775   PetscCall(MatGetBlockSize(Amat, &bs));
7776   nloc = (Iend - Istart) / bs;
7777 
7778   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7779   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7780   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7781 
7782   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7783   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7784      implementation */
7785   if (bs > 1) {
7786     PetscCall(MatGetType(Amat, &jtype));
7787     PetscCall(MatCreate(comm, &Gmat));
7788     PetscCall(MatSetType(Gmat, jtype));
7789     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7790     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7791     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7792       PetscInt  *d_nnz, *o_nnz;
7793       MatScalar *aa, val, *AA;
7794       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7795       if (isseqaij) {
7796         a = Amat;
7797         b = NULL;
7798       } else {
7799         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7800         a             = d->A;
7801         b             = d->B;
7802       }
7803       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7804       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7805       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7806         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7807         const PetscInt *cols1, *cols2;
7808         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7809           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7810           nnz[brow / bs] = nc2 / bs;
7811           if (nc2 % bs) ok = 0;
7812           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7813           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7814             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7815             if (nc1 != nc2) ok = 0;
7816             else {
7817               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7818                 if (cols1[jj] != cols2[jj]) ok = 0;
7819                 if (cols1[jj] % bs != jj % bs) ok = 0;
7820               }
7821             }
7822             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7823           }
7824           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7825           if (!ok) {
7826             PetscCall(PetscFree2(d_nnz, o_nnz));
7827             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7828             goto old_bs;
7829           }
7830         }
7831       }
7832       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7833       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7834       PetscCall(PetscFree2(d_nnz, o_nnz));
7835       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7836       // diag
7837       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7838         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7839         ai               = aseq->i;
7840         n                = ai[brow + 1] - ai[brow];
7841         aj               = aseq->j + ai[brow];
7842         for (int k = 0; k < n; k += bs) {        // block columns
7843           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7844           val        = 0;
7845           if (index_size == 0) {
7846             for (int ii = 0; ii < bs; ii++) { // rows in block
7847               aa = aseq->a + ai[brow + ii] + k;
7848               for (int jj = 0; jj < bs; jj++) {         // columns in block
7849                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7850               }
7851             }
7852           } else {                                       // use (index,index) value if provided
7853             for (int iii = 0; iii < index_size; iii++) { // rows in block
7854               int ii = index[iii];
7855               aa     = aseq->a + ai[brow + ii] + k;
7856               for (int jjj = 0; jjj < index_size; jjj++) { // columns in block
7857                 int jj = index[jjj];
7858                 val += PetscAbs(PetscRealPart(aa[jj]));
7859               }
7860             }
7861           }
7862           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7863           AA[k / bs] = val;
7864         }
7865         grow = Istart / bs + brow / bs;
7866         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7867       }
7868       // off-diag
7869       if (ismpiaij) {
7870         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7871         const PetscScalar *vals;
7872         const PetscInt    *cols, *garray = aij->garray;
7873         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7874         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7875           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7876           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7877             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7878             AA[k / bs] = 0;
7879             AJ[cidx]   = garray[cols[k]] / bs;
7880           }
7881           nc = ncols / bs;
7882           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7883           if (index_size == 0) {
7884             for (int ii = 0; ii < bs; ii++) { // rows in block
7885               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7886               for (int k = 0; k < ncols; k += bs) {
7887                 for (int jj = 0; jj < bs; jj++) { // cols in block
7888                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7889                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7890                 }
7891               }
7892               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7893             }
7894           } else {                                       // use (index,index) value if provided
7895             for (int iii = 0; iii < index_size; iii++) { // rows in block
7896               int ii = index[iii];
7897               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7898               for (int k = 0; k < ncols; k += bs) {
7899                 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block
7900                   int jj = index[jjj];
7901                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7902                 }
7903               }
7904               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7905             }
7906           }
7907           grow = Istart / bs + brow / bs;
7908           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7909         }
7910       }
7911       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7912       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7913       PetscCall(PetscFree2(AA, AJ));
7914     } else {
7915       const PetscScalar *vals;
7916       const PetscInt    *idx;
7917       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7918     old_bs:
7919       /*
7920        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7921        */
7922       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7923       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7924       if (isseqaij) {
7925         PetscInt max_d_nnz;
7926         /*
7927          Determine exact preallocation count for (sequential) scalar matrix
7928          */
7929         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7930         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7931         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7932         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7933         PetscCall(PetscFree3(w0, w1, w2));
7934       } else if (ismpiaij) {
7935         Mat             Daij, Oaij;
7936         const PetscInt *garray;
7937         PetscInt        max_d_nnz;
7938         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7939         /*
7940          Determine exact preallocation count for diagonal block portion of scalar matrix
7941          */
7942         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7943         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7944         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7945         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7946         PetscCall(PetscFree3(w0, w1, w2));
7947         /*
7948          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7949          */
7950         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7951           o_nnz[jj] = 0;
7952           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7953             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7954             o_nnz[jj] += ncols;
7955             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7956           }
7957           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7958         }
7959       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7960       /* get scalar copy (norms) of matrix */
7961       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7962       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7963       PetscCall(PetscFree2(d_nnz, o_nnz));
7964       for (Ii = Istart; Ii < Iend; Ii++) {
7965         PetscInt dest_row = Ii / bs;
7966         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7967         for (jj = 0; jj < ncols; jj++) {
7968           PetscInt    dest_col = idx[jj] / bs;
7969           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7970           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7971         }
7972         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7973       }
7974       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7975       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7976     }
7977   } else {
7978     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7979     else {
7980       Gmat = Amat;
7981       PetscCall(PetscObjectReference((PetscObject)Gmat));
7982     }
7983     if (isseqaij) {
7984       a = Gmat;
7985       b = NULL;
7986     } else {
7987       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7988       a             = d->A;
7989       b             = d->B;
7990     }
7991     if (filter >= 0 || scale) {
7992       /* take absolute value of each entry */
7993       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7994         MatInfo      info;
7995         PetscScalar *avals;
7996         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7997         PetscCall(MatSeqAIJGetArray(c, &avals));
7998         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7999         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8000       }
8001     }
8002   }
8003   if (symmetrize) {
8004     PetscBool isset, issym;
8005     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8006     if (!isset || !issym) {
8007       Mat matTrans;
8008       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8009       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8010       PetscCall(MatDestroy(&matTrans));
8011     }
8012     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8013   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8014   if (scale) {
8015     /* scale c for all diagonal values = 1 or -1 */
8016     Vec diag;
8017     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8018     PetscCall(MatGetDiagonal(Gmat, diag));
8019     PetscCall(VecReciprocal(diag));
8020     PetscCall(VecSqrtAbs(diag));
8021     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8022     PetscCall(VecDestroy(&diag));
8023   }
8024   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8025 
8026   if (filter >= 0) {
8027     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8028     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8029   }
8030   *a_Gmat = Gmat;
8031   PetscFunctionReturn(PETSC_SUCCESS);
8032 }
8033 
8034 /*
8035     Special version for direct calls from Fortran
8036 */
8037 #include <petsc/private/fortranimpl.h>
8038 
8039 /* Change these macros so can be used in void function */
8040 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8041 #undef PetscCall
8042 #define PetscCall(...) \
8043   do { \
8044     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8045     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8046       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8047       return; \
8048     } \
8049   } while (0)
8050 
8051 #undef SETERRQ
8052 #define SETERRQ(comm, ierr, ...) \
8053   do { \
8054     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8055     return; \
8056   } while (0)
8057 
8058 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8059   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8060 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8061   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8062 #else
8063 #endif
8064 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8065 {
8066   Mat         mat = *mmat;
8067   PetscInt    m = *mm, n = *mn;
8068   InsertMode  addv = *maddv;
8069   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8070   PetscScalar value;
8071 
8072   MatCheckPreallocated(mat, 1);
8073   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8074   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8075   {
8076     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8077     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8078     PetscBool roworiented = aij->roworiented;
8079 
8080     /* Some Variables required in the macro */
8081     Mat         A     = aij->A;
8082     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8083     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8084     MatScalar  *aa;
8085     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8086     Mat         B                 = aij->B;
8087     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8088     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8089     MatScalar  *ba;
8090     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8091      * cannot use "#if defined" inside a macro. */
8092     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8093 
8094     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8095     PetscInt   nonew = a->nonew;
8096     MatScalar *ap1, *ap2;
8097 
8098     PetscFunctionBegin;
8099     PetscCall(MatSeqAIJGetArray(A, &aa));
8100     PetscCall(MatSeqAIJGetArray(B, &ba));
8101     for (i = 0; i < m; i++) {
8102       if (im[i] < 0) continue;
8103       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8104       if (im[i] >= rstart && im[i] < rend) {
8105         row      = im[i] - rstart;
8106         lastcol1 = -1;
8107         rp1      = aj + ai[row];
8108         ap1      = aa + ai[row];
8109         rmax1    = aimax[row];
8110         nrow1    = ailen[row];
8111         low1     = 0;
8112         high1    = nrow1;
8113         lastcol2 = -1;
8114         rp2      = bj + bi[row];
8115         ap2      = ba + bi[row];
8116         rmax2    = bimax[row];
8117         nrow2    = bilen[row];
8118         low2     = 0;
8119         high2    = nrow2;
8120 
8121         for (j = 0; j < n; j++) {
8122           if (roworiented) value = v[i * n + j];
8123           else value = v[i + j * m];
8124           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8125           if (in[j] >= cstart && in[j] < cend) {
8126             col = in[j] - cstart;
8127             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8128           } else if (in[j] < 0) continue;
8129           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8130             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8131           } else {
8132             if (mat->was_assembled) {
8133               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8134 #if defined(PETSC_USE_CTABLE)
8135               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8136               col--;
8137 #else
8138               col = aij->colmap[in[j]] - 1;
8139 #endif
8140               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8141                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8142                 col = in[j];
8143                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8144                 B        = aij->B;
8145                 b        = (Mat_SeqAIJ *)B->data;
8146                 bimax    = b->imax;
8147                 bi       = b->i;
8148                 bilen    = b->ilen;
8149                 bj       = b->j;
8150                 rp2      = bj + bi[row];
8151                 ap2      = ba + bi[row];
8152                 rmax2    = bimax[row];
8153                 nrow2    = bilen[row];
8154                 low2     = 0;
8155                 high2    = nrow2;
8156                 bm       = aij->B->rmap->n;
8157                 ba       = b->a;
8158                 inserted = PETSC_FALSE;
8159               }
8160             } else col = in[j];
8161             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8162           }
8163         }
8164       } else if (!aij->donotstash) {
8165         if (roworiented) {
8166           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8167         } else {
8168           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8169         }
8170       }
8171     }
8172     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8173     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8174   }
8175   PetscFunctionReturnVoid();
8176 }
8177 
8178 /* Undefining these here since they were redefined from their original definition above! No
8179  * other PETSc functions should be defined past this point, as it is impossible to recover the
8180  * original definitions */
8181 #undef PetscCall
8182 #undef SETERRQ
8183