xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 623b4cf32a4d9ee5e9da0c76cfd89db17cbd9c1f)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166 
167   PetscFunctionReturn(PETSC_SUCCESS);
168 }
169 
170 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
171 {
172   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
173 
174   PetscFunctionBegin;
175   if (mat->A) {
176     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
177     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
178   }
179   PetscFunctionReturn(PETSC_SUCCESS);
180 }
181 
182 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
183 {
184   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
185   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
186   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
187   const PetscInt  *ia, *ib;
188   const MatScalar *aa, *bb, *aav, *bav;
189   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
190   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
191 
192   PetscFunctionBegin;
193   *keptrows = NULL;
194 
195   ia = a->i;
196   ib = b->i;
197   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
198   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
199   for (i = 0; i < m; i++) {
200     na = ia[i + 1] - ia[i];
201     nb = ib[i + 1] - ib[i];
202     if (!na && !nb) {
203       cnt++;
204       goto ok1;
205     }
206     aa = aav + ia[i];
207     for (j = 0; j < na; j++) {
208       if (aa[j] != 0.0) goto ok1;
209     }
210     bb = PetscSafePointerPlusOffset(bav, ib[i]);
211     for (j = 0; j < nb; j++) {
212       if (bb[j] != 0.0) goto ok1;
213     }
214     cnt++;
215   ok1:;
216   }
217   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
218   if (!n0rows) {
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
220     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
221     PetscFunctionReturn(PETSC_SUCCESS);
222   }
223   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
224   cnt = 0;
225   for (i = 0; i < m; i++) {
226     na = ia[i + 1] - ia[i];
227     nb = ib[i + 1] - ib[i];
228     if (!na && !nb) continue;
229     aa = aav + ia[i];
230     for (j = 0; j < na; j++) {
231       if (aa[j] != 0.0) {
232         rows[cnt++] = rstart + i;
233         goto ok2;
234       }
235     }
236     bb = PetscSafePointerPlusOffset(bav, ib[i]);
237     for (j = 0; j < nb; j++) {
238       if (bb[j] != 0.0) {
239         rows[cnt++] = rstart + i;
240         goto ok2;
241       }
242     }
243   ok2:;
244   }
245   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
247   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
252 {
253   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
254   PetscBool   cong;
255 
256   PetscFunctionBegin;
257   PetscCall(MatHasCongruentLayouts(Y, &cong));
258   if (Y->assembled && cong) {
259     PetscCall(MatDiagonalSet(aij->A, D, is));
260   } else {
261     PetscCall(MatDiagonalSet_Default(Y, D, is));
262   }
263   PetscFunctionReturn(PETSC_SUCCESS);
264 }
265 
266 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
267 {
268   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
269   PetscInt    i, rstart, nrows, *rows;
270 
271   PetscFunctionBegin;
272   *zrows = NULL;
273   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
274   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
275   for (i = 0; i < nrows; i++) rows[i] += rstart;
276   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
280 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
281 {
282   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
283   PetscInt           i, m, n, *garray = aij->garray;
284   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
285   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
286   PetscReal         *work;
287   const PetscScalar *dummy;
288 
289   PetscFunctionBegin;
290   PetscCall(MatGetSize(A, &m, &n));
291   PetscCall(PetscCalloc1(n, &work));
292   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
294   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
296   if (type == NORM_2) {
297     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
298     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
299   } else if (type == NORM_1) {
300     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
301     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
302   } else if (type == NORM_INFINITY) {
303     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
304     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
306     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
307     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
308   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
309     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
310     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
311   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
312   if (type == NORM_INFINITY) {
313     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
314   } else {
315     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
316   }
317   PetscCall(PetscFree(work));
318   if (type == NORM_2) {
319     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
320   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
321     for (i = 0; i < n; i++) reductions[i] /= m;
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
327 {
328   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
329   IS              sis, gis;
330   const PetscInt *isis, *igis;
331   PetscInt        n, *iis, nsis, ngis, rstart, i;
332 
333   PetscFunctionBegin;
334   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
335   PetscCall(MatFindNonzeroRows(a->B, &gis));
336   PetscCall(ISGetSize(gis, &ngis));
337   PetscCall(ISGetSize(sis, &nsis));
338   PetscCall(ISGetIndices(sis, &isis));
339   PetscCall(ISGetIndices(gis, &igis));
340 
341   PetscCall(PetscMalloc1(ngis + nsis, &iis));
342   PetscCall(PetscArraycpy(iis, igis, ngis));
343   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
344   n = ngis + nsis;
345   PetscCall(PetscSortRemoveDupsInt(&n, iis));
346   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
347   for (i = 0; i < n; i++) iis[i] += rstart;
348   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
349 
350   PetscCall(ISRestoreIndices(sis, &isis));
351   PetscCall(ISRestoreIndices(gis, &igis));
352   PetscCall(ISDestroy(&sis));
353   PetscCall(ISDestroy(&gis));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 /*
358   Local utility routine that creates a mapping from the global column
359 number to the local number in the off-diagonal part of the local
360 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
361 a slightly higher hash table cost; without it it is not scalable (each processor
362 has an order N integer array but is fast to access.
363 */
364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
365 {
366   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
367   PetscInt    n   = aij->B->cmap->n, i;
368 
369   PetscFunctionBegin;
370   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
371 #if defined(PETSC_USE_CTABLE)
372   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
373   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
374 #else
375   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
376   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
377 #endif
378   PetscFunctionReturn(PETSC_SUCCESS);
379 }
380 
381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
382   do { \
383     if (col <= lastcol1) low1 = 0; \
384     else high1 = nrow1; \
385     lastcol1 = col; \
386     while (high1 - low1 > 5) { \
387       t = (low1 + high1) / 2; \
388       if (rp1[t] > col) high1 = t; \
389       else low1 = t; \
390     } \
391     for (_i = low1; _i < high1; _i++) { \
392       if (rp1[_i] > col) break; \
393       if (rp1[_i] == col) { \
394         if (addv == ADD_VALUES) { \
395           ap1[_i] += value; \
396           /* Not sure LogFlops will slow dow the code or not */ \
397           (void)PetscLogFlops(1.0); \
398         } else ap1[_i] = value; \
399         goto a_noinsert; \
400       } \
401     } \
402     if (value == 0.0 && ignorezeroentries && row != col) { \
403       low1  = 0; \
404       high1 = nrow1; \
405       goto a_noinsert; \
406     } \
407     if (nonew == 1) { \
408       low1  = 0; \
409       high1 = nrow1; \
410       goto a_noinsert; \
411     } \
412     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
413     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
414     N = nrow1++ - 1; \
415     a->nz++; \
416     high1++; \
417     /* shift up all the later entries in this row */ \
418     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
419     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
420     rp1[_i] = col; \
421     ap1[_i] = value; \
422     A->nonzerostate++; \
423   a_noinsert:; \
424     ailen[row] = nrow1; \
425   } while (0)
426 
427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
428   do { \
429     if (col <= lastcol2) low2 = 0; \
430     else high2 = nrow2; \
431     lastcol2 = col; \
432     while (high2 - low2 > 5) { \
433       t = (low2 + high2) / 2; \
434       if (rp2[t] > col) high2 = t; \
435       else low2 = t; \
436     } \
437     for (_i = low2; _i < high2; _i++) { \
438       if (rp2[_i] > col) break; \
439       if (rp2[_i] == col) { \
440         if (addv == ADD_VALUES) { \
441           ap2[_i] += value; \
442           (void)PetscLogFlops(1.0); \
443         } else ap2[_i] = value; \
444         goto b_noinsert; \
445       } \
446     } \
447     if (value == 0.0 && ignorezeroentries) { \
448       low2  = 0; \
449       high2 = nrow2; \
450       goto b_noinsert; \
451     } \
452     if (nonew == 1) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
458     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
459     N = nrow2++ - 1; \
460     b->nz++; \
461     high2++; \
462     /* shift up all the later entries in this row */ \
463     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
464     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
465     rp2[_i] = col; \
466     ap2[_i] = value; \
467     B->nonzerostate++; \
468   b_noinsert:; \
469     bilen[row] = nrow2; \
470   } while (0)
471 
472 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
473 {
474   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
475   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
476   PetscInt     l, *garray                         = mat->garray, diag;
477   PetscScalar *aa, *ba;
478 
479   PetscFunctionBegin;
480   /* code only works for square matrices A */
481 
482   /* find size of row to the left of the diagonal part */
483   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
484   row = row - diag;
485   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
486     if (garray[b->j[b->i[row] + l]] > diag) break;
487   }
488   if (l) {
489     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
490     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
491     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
492   }
493 
494   /* diagonal part */
495   if (a->i[row + 1] - a->i[row]) {
496     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
497     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
498     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
499   }
500 
501   /* right of diagonal part */
502   if (b->i[row + 1] - b->i[row] - l) {
503     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
504     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
505     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
506   }
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
511 {
512   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
513   PetscScalar value = 0.0;
514   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
515   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
516   PetscBool   roworiented = aij->roworiented;
517 
518   /* Some Variables required in the macro */
519   Mat         A     = aij->A;
520   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
521   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
522   PetscBool   ignorezeroentries = a->ignorezeroentries;
523   Mat         B                 = aij->B;
524   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
525   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
526   MatScalar  *aa, *ba;
527   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
528   PetscInt    nonew;
529   MatScalar  *ap1, *ap2;
530 
531   PetscFunctionBegin;
532   PetscCall(MatSeqAIJGetArray(A, &aa));
533   PetscCall(MatSeqAIJGetArray(B, &ba));
534   for (i = 0; i < m; i++) {
535     if (im[i] < 0) continue;
536     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
537     if (im[i] >= rstart && im[i] < rend) {
538       row      = im[i] - rstart;
539       lastcol1 = -1;
540       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
541       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
542       rmax1    = aimax[row];
543       nrow1    = ailen[row];
544       low1     = 0;
545       high1    = nrow1;
546       lastcol2 = -1;
547       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
548       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
549       rmax2    = bimax[row];
550       nrow2    = bilen[row];
551       low2     = 0;
552       high2    = nrow2;
553 
554       for (j = 0; j < n; j++) {
555         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
556         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
557         if (in[j] >= cstart && in[j] < cend) {
558           col   = in[j] - cstart;
559           nonew = a->nonew;
560           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
561         } else if (in[j] < 0) {
562           continue;
563         } else {
564           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
565           if (mat->was_assembled) {
566             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
567 #if defined(PETSC_USE_CTABLE)
568             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
569             col--;
570 #else
571             col = aij->colmap[in[j]] - 1;
572 #endif
573             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
574               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
575               col = in[j];
576               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
577               B     = aij->B;
578               b     = (Mat_SeqAIJ *)B->data;
579               bimax = b->imax;
580               bi    = b->i;
581               bilen = b->ilen;
582               bj    = b->j;
583               ba    = b->a;
584               rp2   = bj + bi[row];
585               ap2   = ba + bi[row];
586               rmax2 = bimax[row];
587               nrow2 = bilen[row];
588               low2  = 0;
589               high2 = nrow2;
590               bm    = aij->B->rmap->n;
591               ba    = b->a;
592             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
593               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
594                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
595               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
596             }
597           } else col = in[j];
598           nonew = b->nonew;
599           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
600         }
601       }
602     } else {
603       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
604       if (!aij->donotstash) {
605         mat->assembled = PETSC_FALSE;
606         if (roworiented) {
607           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
608         } else {
609           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
610         }
611       }
612     }
613   }
614   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
615   PetscCall(MatSeqAIJRestoreArray(B, &ba));
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 /*
620     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
621     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
622     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
623 */
624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
625 {
626   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
627   Mat         A      = aij->A; /* diagonal part of the matrix */
628   Mat         B      = aij->B; /* off-diagonal part of the matrix */
629   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
630   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
631   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
632   PetscInt   *ailen = a->ilen, *aj = a->j;
633   PetscInt   *bilen = b->ilen, *bj = b->j;
634   PetscInt    am          = aij->A->rmap->n, j;
635   PetscInt    diag_so_far = 0, dnz;
636   PetscInt    offd_so_far = 0, onz;
637 
638   PetscFunctionBegin;
639   /* Iterate over all rows of the matrix */
640   for (j = 0; j < am; j++) {
641     dnz = onz = 0;
642     /*  Iterate over all non-zero columns of the current row */
643     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
644       /* If column is in the diagonal */
645       if (mat_j[col] >= cstart && mat_j[col] < cend) {
646         aj[diag_so_far++] = mat_j[col] - cstart;
647         dnz++;
648       } else { /* off-diagonal entries */
649         bj[offd_so_far++] = mat_j[col];
650         onz++;
651       }
652     }
653     ailen[j] = dnz;
654     bilen[j] = onz;
655   }
656   PetscFunctionReturn(PETSC_SUCCESS);
657 }
658 
659 /*
660     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
661     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
662     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
663     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
664     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
665 */
666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
667 {
668   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
669   Mat          A    = aij->A; /* diagonal part of the matrix */
670   Mat          B    = aij->B; /* off-diagonal part of the matrix */
671   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
672   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
673   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
674   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
675   PetscInt    *ailen = a->ilen, *aj = a->j;
676   PetscInt    *bilen = b->ilen, *bj = b->j;
677   PetscInt     am          = aij->A->rmap->n, j;
678   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
679   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
680   PetscScalar *aa = a->a, *ba = b->a;
681 
682   PetscFunctionBegin;
683   /* Iterate over all rows of the matrix */
684   for (j = 0; j < am; j++) {
685     dnz_row = onz_row = 0;
686     rowstart_offd     = full_offd_i[j];
687     rowstart_diag     = full_diag_i[j];
688     /*  Iterate over all non-zero columns of the current row */
689     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
690       /* If column is in the diagonal */
691       if (mat_j[col] >= cstart && mat_j[col] < cend) {
692         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
693         aa[rowstart_diag + dnz_row] = mat_a[col];
694         dnz_row++;
695       } else { /* off-diagonal entries */
696         bj[rowstart_offd + onz_row] = mat_j[col];
697         ba[rowstart_offd + onz_row] = mat_a[col];
698         onz_row++;
699       }
700     }
701     ailen[j] = dnz_row;
702     bilen[j] = onz_row;
703   }
704   PetscFunctionReturn(PETSC_SUCCESS);
705 }
706 
707 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
708 {
709   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
710   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
711   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
712 
713   PetscFunctionBegin;
714   for (i = 0; i < m; i++) {
715     if (idxm[i] < 0) continue; /* negative row */
716     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
717     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
718     row = idxm[i] - rstart;
719     for (j = 0; j < n; j++) {
720       if (idxn[j] < 0) continue; /* negative column */
721       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
722       if (idxn[j] >= cstart && idxn[j] < cend) {
723         col = idxn[j] - cstart;
724         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
725       } else {
726         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
727 #if defined(PETSC_USE_CTABLE)
728         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
729         col--;
730 #else
731         col = aij->colmap[idxn[j]] - 1;
732 #endif
733         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
734         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
735       }
736     }
737   }
738   PetscFunctionReturn(PETSC_SUCCESS);
739 }
740 
741 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
742 {
743   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
744   PetscInt    nstash, reallocs;
745 
746   PetscFunctionBegin;
747   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
748 
749   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
750   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
751   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
752   PetscFunctionReturn(PETSC_SUCCESS);
753 }
754 
755 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
756 {
757   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
758   PetscMPIInt  n;
759   PetscInt     i, j, rstart, ncols, flg;
760   PetscInt    *row, *col;
761   PetscBool    other_disassembled;
762   PetscScalar *val;
763 
764   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
765 
766   PetscFunctionBegin;
767   if (!aij->donotstash && !mat->nooffprocentries) {
768     while (1) {
769       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
770       if (!flg) break;
771 
772       for (i = 0; i < n;) {
773         /* Now identify the consecutive vals belonging to the same row */
774         for (j = i, rstart = row[j]; j < n; j++) {
775           if (row[j] != rstart) break;
776         }
777         if (j < n) ncols = j - i;
778         else ncols = n - i;
779         /* Now assemble all these values with a single function call */
780         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
781         i = j;
782       }
783     }
784     PetscCall(MatStashScatterEnd_Private(&mat->stash));
785   }
786 #if defined(PETSC_HAVE_DEVICE)
787   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
788   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
789   if (mat->boundtocpu) {
790     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
791     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
792   }
793 #endif
794   PetscCall(MatAssemblyBegin(aij->A, mode));
795   PetscCall(MatAssemblyEnd(aij->A, mode));
796 
797   /* determine if any processor has disassembled, if so we must
798      also disassemble ourself, in order that we may reassemble. */
799   /*
800      if nonzero structure of submatrix B cannot change then we know that
801      no processor disassembled thus we can skip this stuff
802   */
803   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
804     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
805     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
806       PetscCall(MatDisAssemble_MPIAIJ(mat));
807     }
808   }
809   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
810   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
811 #if defined(PETSC_HAVE_DEVICE)
812   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
813 #endif
814   PetscCall(MatAssemblyBegin(aij->B, mode));
815   PetscCall(MatAssemblyEnd(aij->B, mode));
816 
817   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
818 
819   aij->rowvalues = NULL;
820 
821   PetscCall(VecDestroy(&aij->diag));
822 
823   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
824   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
825     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
826     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
827   }
828 #if defined(PETSC_HAVE_DEVICE)
829   mat->offloadmask = PETSC_OFFLOAD_BOTH;
830 #endif
831   PetscFunctionReturn(PETSC_SUCCESS);
832 }
833 
834 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
835 {
836   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
837 
838   PetscFunctionBegin;
839   PetscCall(MatZeroEntries(l->A));
840   PetscCall(MatZeroEntries(l->B));
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
845 {
846   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
847   PetscObjectState sA, sB;
848   PetscInt        *lrows;
849   PetscInt         r, len;
850   PetscBool        cong, lch, gch;
851 
852   PetscFunctionBegin;
853   /* get locally owned rows */
854   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
855   PetscCall(MatHasCongruentLayouts(A, &cong));
856   /* fix right hand side if needed */
857   if (x && b) {
858     const PetscScalar *xx;
859     PetscScalar       *bb;
860 
861     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
862     PetscCall(VecGetArrayRead(x, &xx));
863     PetscCall(VecGetArray(b, &bb));
864     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
865     PetscCall(VecRestoreArrayRead(x, &xx));
866     PetscCall(VecRestoreArray(b, &bb));
867   }
868 
869   sA = mat->A->nonzerostate;
870   sB = mat->B->nonzerostate;
871 
872   if (diag != 0.0 && cong) {
873     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
874     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
875   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
876     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
877     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
878     PetscInt    nnwA, nnwB;
879     PetscBool   nnzA, nnzB;
880 
881     nnwA = aijA->nonew;
882     nnwB = aijB->nonew;
883     nnzA = aijA->keepnonzeropattern;
884     nnzB = aijB->keepnonzeropattern;
885     if (!nnzA) {
886       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
887       aijA->nonew = 0;
888     }
889     if (!nnzB) {
890       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
891       aijB->nonew = 0;
892     }
893     /* Must zero here before the next loop */
894     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
895     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
896     for (r = 0; r < len; ++r) {
897       const PetscInt row = lrows[r] + A->rmap->rstart;
898       if (row >= A->cmap->N) continue;
899       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
900     }
901     aijA->nonew = nnwA;
902     aijB->nonew = nnwB;
903   } else {
904     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
905     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
906   }
907   PetscCall(PetscFree(lrows));
908   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
909   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
910 
911   /* reduce nonzerostate */
912   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
913   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
914   if (gch) A->nonzerostate++;
915   PetscFunctionReturn(PETSC_SUCCESS);
916 }
917 
918 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
919 {
920   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
921   PetscMPIInt        n = A->rmap->n;
922   PetscInt           i, j, r, m, len = 0;
923   PetscInt          *lrows, *owners = A->rmap->range;
924   PetscMPIInt        p = 0;
925   PetscSFNode       *rrows;
926   PetscSF            sf;
927   const PetscScalar *xx;
928   PetscScalar       *bb, *mask, *aij_a;
929   Vec                xmask, lmask;
930   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
931   const PetscInt    *aj, *ii, *ridx;
932   PetscScalar       *aa;
933 
934   PetscFunctionBegin;
935   /* Create SF where leaves are input rows and roots are owned rows */
936   PetscCall(PetscMalloc1(n, &lrows));
937   for (r = 0; r < n; ++r) lrows[r] = -1;
938   PetscCall(PetscMalloc1(N, &rrows));
939   for (r = 0; r < N; ++r) {
940     const PetscInt idx = rows[r];
941     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
942     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
943       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
944     }
945     rrows[r].rank  = p;
946     rrows[r].index = rows[r] - owners[p];
947   }
948   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
949   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
950   /* Collect flags for rows to be zeroed */
951   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
952   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
953   PetscCall(PetscSFDestroy(&sf));
954   /* Compress and put in row numbers */
955   for (r = 0; r < n; ++r)
956     if (lrows[r] >= 0) lrows[len++] = r;
957   /* zero diagonal part of matrix */
958   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
959   /* handle off-diagonal part of matrix */
960   PetscCall(MatCreateVecs(A, &xmask, NULL));
961   PetscCall(VecDuplicate(l->lvec, &lmask));
962   PetscCall(VecGetArray(xmask, &bb));
963   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
964   PetscCall(VecRestoreArray(xmask, &bb));
965   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
966   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
967   PetscCall(VecDestroy(&xmask));
968   if (x && b) { /* this code is buggy when the row and column layout don't match */
969     PetscBool cong;
970 
971     PetscCall(MatHasCongruentLayouts(A, &cong));
972     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
973     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
974     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
975     PetscCall(VecGetArrayRead(l->lvec, &xx));
976     PetscCall(VecGetArray(b, &bb));
977   }
978   PetscCall(VecGetArray(lmask, &mask));
979   /* remove zeroed rows of off-diagonal matrix */
980   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
981   ii = aij->i;
982   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
983   /* loop over all elements of off process part of matrix zeroing removed columns*/
984   if (aij->compressedrow.use) {
985     m    = aij->compressedrow.nrows;
986     ii   = aij->compressedrow.i;
987     ridx = aij->compressedrow.rindex;
988     for (i = 0; i < m; i++) {
989       n  = ii[i + 1] - ii[i];
990       aj = aij->j + ii[i];
991       aa = aij_a + ii[i];
992 
993       for (j = 0; j < n; j++) {
994         if (PetscAbsScalar(mask[*aj])) {
995           if (b) bb[*ridx] -= *aa * xx[*aj];
996           *aa = 0.0;
997         }
998         aa++;
999         aj++;
1000       }
1001       ridx++;
1002     }
1003   } else { /* do not use compressed row format */
1004     m = l->B->rmap->n;
1005     for (i = 0; i < m; i++) {
1006       n  = ii[i + 1] - ii[i];
1007       aj = aij->j + ii[i];
1008       aa = aij_a + ii[i];
1009       for (j = 0; j < n; j++) {
1010         if (PetscAbsScalar(mask[*aj])) {
1011           if (b) bb[i] -= *aa * xx[*aj];
1012           *aa = 0.0;
1013         }
1014         aa++;
1015         aj++;
1016       }
1017     }
1018   }
1019   if (x && b) {
1020     PetscCall(VecRestoreArray(b, &bb));
1021     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1022   }
1023   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1024   PetscCall(VecRestoreArray(lmask, &mask));
1025   PetscCall(VecDestroy(&lmask));
1026   PetscCall(PetscFree(lrows));
1027 
1028   /* only change matrix nonzero state if pattern was allowed to be changed */
1029   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
1030     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1031     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1032   }
1033   PetscFunctionReturn(PETSC_SUCCESS);
1034 }
1035 
1036 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1037 {
1038   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1039   PetscInt    nt;
1040   VecScatter  Mvctx = a->Mvctx;
1041 
1042   PetscFunctionBegin;
1043   PetscCall(VecGetLocalSize(xx, &nt));
1044   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1045   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1046   PetscUseTypeMethod(a->A, mult, xx, yy);
1047   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1048   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1049   PetscFunctionReturn(PETSC_SUCCESS);
1050 }
1051 
1052 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1053 {
1054   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1055 
1056   PetscFunctionBegin;
1057   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1058   PetscFunctionReturn(PETSC_SUCCESS);
1059 }
1060 
1061 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1062 {
1063   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1064   VecScatter  Mvctx = a->Mvctx;
1065 
1066   PetscFunctionBegin;
1067   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1068   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1069   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1070   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1071   PetscFunctionReturn(PETSC_SUCCESS);
1072 }
1073 
1074 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1075 {
1076   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1077 
1078   PetscFunctionBegin;
1079   /* do nondiagonal part */
1080   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1081   /* do local part */
1082   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1083   /* add partial results together */
1084   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1085   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
1089 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1090 {
1091   MPI_Comm    comm;
1092   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1093   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1094   IS          Me, Notme;
1095   PetscInt    M, N, first, last, *notme, i;
1096   PetscBool   lf;
1097   PetscMPIInt size;
1098 
1099   PetscFunctionBegin;
1100   /* Easy test: symmetric diagonal block */
1101   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1102   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1103   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1104   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1105   PetscCallMPI(MPI_Comm_size(comm, &size));
1106   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1107 
1108   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1109   PetscCall(MatGetSize(Amat, &M, &N));
1110   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1111   PetscCall(PetscMalloc1(N - last + first, &notme));
1112   for (i = 0; i < first; i++) notme[i] = i;
1113   for (i = last; i < M; i++) notme[i - last + first] = i;
1114   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1115   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1116   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1117   Aoff = Aoffs[0];
1118   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1119   Boff = Boffs[0];
1120   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1121   PetscCall(MatDestroyMatrices(1, &Aoffs));
1122   PetscCall(MatDestroyMatrices(1, &Boffs));
1123   PetscCall(ISDestroy(&Me));
1124   PetscCall(ISDestroy(&Notme));
1125   PetscCall(PetscFree(notme));
1126   PetscFunctionReturn(PETSC_SUCCESS);
1127 }
1128 
1129 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1130 {
1131   PetscFunctionBegin;
1132   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1133   PetscFunctionReturn(PETSC_SUCCESS);
1134 }
1135 
1136 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1137 {
1138   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1139 
1140   PetscFunctionBegin;
1141   /* do nondiagonal part */
1142   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1143   /* do local part */
1144   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1145   /* add partial results together */
1146   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1147   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1148   PetscFunctionReturn(PETSC_SUCCESS);
1149 }
1150 
1151 /*
1152   This only works correctly for square matrices where the subblock A->A is the
1153    diagonal block
1154 */
1155 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1156 {
1157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1161   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1162   PetscCall(MatGetDiagonal(a->A, v));
1163   PetscFunctionReturn(PETSC_SUCCESS);
1164 }
1165 
1166 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1167 {
1168   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1169 
1170   PetscFunctionBegin;
1171   PetscCall(MatScale(a->A, aa));
1172   PetscCall(MatScale(a->B, aa));
1173   PetscFunctionReturn(PETSC_SUCCESS);
1174 }
1175 
1176 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1177 {
1178   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1179   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1180   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1181   const PetscInt    *garray = aij->garray;
1182   const PetscScalar *aa, *ba;
1183   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1184   PetscInt64         nz, hnz;
1185   PetscInt          *rowlens;
1186   PetscInt          *colidxs;
1187   PetscScalar       *matvals;
1188   PetscMPIInt        rank;
1189 
1190   PetscFunctionBegin;
1191   PetscCall(PetscViewerSetUp(viewer));
1192 
1193   M  = mat->rmap->N;
1194   N  = mat->cmap->N;
1195   m  = mat->rmap->n;
1196   rs = mat->rmap->rstart;
1197   cs = mat->cmap->rstart;
1198   nz = A->nz + B->nz;
1199 
1200   /* write matrix header */
1201   header[0] = MAT_FILE_CLASSID;
1202   header[1] = M;
1203   header[2] = N;
1204   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1205   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1206   if (rank == 0) {
1207     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1208     else header[3] = (PetscInt)hnz;
1209   }
1210   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1211 
1212   /* fill in and store row lengths  */
1213   PetscCall(PetscMalloc1(m, &rowlens));
1214   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1215   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1216   PetscCall(PetscFree(rowlens));
1217 
1218   /* fill in and store column indices */
1219   PetscCall(PetscMalloc1(nz, &colidxs));
1220   for (cnt = 0, i = 0; i < m; i++) {
1221     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1222       if (garray[B->j[jb]] > cs) break;
1223       colidxs[cnt++] = garray[B->j[jb]];
1224     }
1225     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1226     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1227   }
1228   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1229   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1230   PetscCall(PetscFree(colidxs));
1231 
1232   /* fill in and store nonzero values */
1233   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1234   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1235   PetscCall(PetscMalloc1(nz, &matvals));
1236   for (cnt = 0, i = 0; i < m; i++) {
1237     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1238       if (garray[B->j[jb]] > cs) break;
1239       matvals[cnt++] = ba[jb];
1240     }
1241     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1242     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1243   }
1244   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1245   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1246   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1247   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1248   PetscCall(PetscFree(matvals));
1249 
1250   /* write block size option to the viewer's .info file */
1251   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1252   PetscFunctionReturn(PETSC_SUCCESS);
1253 }
1254 
1255 #include <petscdraw.h>
1256 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1257 {
1258   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1259   PetscMPIInt       rank = aij->rank, size = aij->size;
1260   PetscBool         isdraw, iascii, isbinary;
1261   PetscViewer       sviewer;
1262   PetscViewerFormat format;
1263 
1264   PetscFunctionBegin;
1265   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1266   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1267   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1268   if (iascii) {
1269     PetscCall(PetscViewerGetFormat(viewer, &format));
1270     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1271       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1272       PetscCall(PetscMalloc1(size, &nz));
1273       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1274       for (i = 0; i < (PetscInt)size; i++) {
1275         nmax = PetscMax(nmax, nz[i]);
1276         nmin = PetscMin(nmin, nz[i]);
1277         navg += nz[i];
1278       }
1279       PetscCall(PetscFree(nz));
1280       navg = navg / size;
1281       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1282       PetscFunctionReturn(PETSC_SUCCESS);
1283     }
1284     PetscCall(PetscViewerGetFormat(viewer, &format));
1285     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1286       MatInfo   info;
1287       PetscInt *inodes = NULL;
1288 
1289       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1290       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1291       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1292       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1293       if (!inodes) {
1294         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1295                                                      (double)info.memory));
1296       } else {
1297         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1298                                                      (double)info.memory));
1299       }
1300       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1301       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1302       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1303       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1304       PetscCall(PetscViewerFlush(viewer));
1305       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1306       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1307       PetscCall(VecScatterView(aij->Mvctx, viewer));
1308       PetscFunctionReturn(PETSC_SUCCESS);
1309     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1310       PetscInt inodecount, inodelimit, *inodes;
1311       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1312       if (inodes) {
1313         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1314       } else {
1315         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1316       }
1317       PetscFunctionReturn(PETSC_SUCCESS);
1318     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1319       PetscFunctionReturn(PETSC_SUCCESS);
1320     }
1321   } else if (isbinary) {
1322     if (size == 1) {
1323       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1324       PetscCall(MatView(aij->A, viewer));
1325     } else {
1326       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1327     }
1328     PetscFunctionReturn(PETSC_SUCCESS);
1329   } else if (iascii && size == 1) {
1330     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1331     PetscCall(MatView(aij->A, viewer));
1332     PetscFunctionReturn(PETSC_SUCCESS);
1333   } else if (isdraw) {
1334     PetscDraw draw;
1335     PetscBool isnull;
1336     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1337     PetscCall(PetscDrawIsNull(draw, &isnull));
1338     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1339   }
1340 
1341   { /* assemble the entire matrix onto first processor */
1342     Mat A = NULL, Av;
1343     IS  isrow, iscol;
1344 
1345     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1346     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1347     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1348     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1349     /*  The commented code uses MatCreateSubMatrices instead */
1350     /*
1351     Mat *AA, A = NULL, Av;
1352     IS  isrow,iscol;
1353 
1354     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1355     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1356     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1357     if (rank == 0) {
1358        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1359        A    = AA[0];
1360        Av   = AA[0];
1361     }
1362     PetscCall(MatDestroySubMatrices(1,&AA));
1363 */
1364     PetscCall(ISDestroy(&iscol));
1365     PetscCall(ISDestroy(&isrow));
1366     /*
1367        Everyone has to call to draw the matrix since the graphics waits are
1368        synchronized across all processors that share the PetscDraw object
1369     */
1370     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1371     if (rank == 0) {
1372       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1373       PetscCall(MatView_SeqAIJ(Av, sviewer));
1374     }
1375     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1376     PetscCall(MatDestroy(&A));
1377   }
1378   PetscFunctionReturn(PETSC_SUCCESS);
1379 }
1380 
1381 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1382 {
1383   PetscBool iascii, isdraw, issocket, isbinary;
1384 
1385   PetscFunctionBegin;
1386   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1387   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1388   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1389   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1390   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1391   PetscFunctionReturn(PETSC_SUCCESS);
1392 }
1393 
1394 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1395 {
1396   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1397   Vec         bb1 = NULL;
1398   PetscBool   hasop;
1399 
1400   PetscFunctionBegin;
1401   if (flag == SOR_APPLY_UPPER) {
1402     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1403     PetscFunctionReturn(PETSC_SUCCESS);
1404   }
1405 
1406   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1407 
1408   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1409     if (flag & SOR_ZERO_INITIAL_GUESS) {
1410       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1411       its--;
1412     }
1413 
1414     while (its--) {
1415       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1416       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1417 
1418       /* update rhs: bb1 = bb - B*x */
1419       PetscCall(VecScale(mat->lvec, -1.0));
1420       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1421 
1422       /* local sweep */
1423       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1424     }
1425   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1426     if (flag & SOR_ZERO_INITIAL_GUESS) {
1427       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1428       its--;
1429     }
1430     while (its--) {
1431       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1432       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1433 
1434       /* update rhs: bb1 = bb - B*x */
1435       PetscCall(VecScale(mat->lvec, -1.0));
1436       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1437 
1438       /* local sweep */
1439       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1440     }
1441   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1442     if (flag & SOR_ZERO_INITIAL_GUESS) {
1443       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1444       its--;
1445     }
1446     while (its--) {
1447       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1448       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1449 
1450       /* update rhs: bb1 = bb - B*x */
1451       PetscCall(VecScale(mat->lvec, -1.0));
1452       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1453 
1454       /* local sweep */
1455       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1456     }
1457   } else if (flag & SOR_EISENSTAT) {
1458     Vec xx1;
1459 
1460     PetscCall(VecDuplicate(bb, &xx1));
1461     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1462 
1463     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1464     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1465     if (!mat->diag) {
1466       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1467       PetscCall(MatGetDiagonal(matin, mat->diag));
1468     }
1469     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1470     if (hasop) {
1471       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1472     } else {
1473       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1474     }
1475     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1476 
1477     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1478 
1479     /* local sweep */
1480     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1481     PetscCall(VecAXPY(xx, 1.0, xx1));
1482     PetscCall(VecDestroy(&xx1));
1483   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1484 
1485   PetscCall(VecDestroy(&bb1));
1486 
1487   matin->factorerrortype = mat->A->factorerrortype;
1488   PetscFunctionReturn(PETSC_SUCCESS);
1489 }
1490 
1491 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1492 {
1493   Mat             aA, aB, Aperm;
1494   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1495   PetscScalar    *aa, *ba;
1496   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1497   PetscSF         rowsf, sf;
1498   IS              parcolp = NULL;
1499   PetscBool       done;
1500 
1501   PetscFunctionBegin;
1502   PetscCall(MatGetLocalSize(A, &m, &n));
1503   PetscCall(ISGetIndices(rowp, &rwant));
1504   PetscCall(ISGetIndices(colp, &cwant));
1505   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1506 
1507   /* Invert row permutation to find out where my rows should go */
1508   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1509   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1510   PetscCall(PetscSFSetFromOptions(rowsf));
1511   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1512   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1513   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1514 
1515   /* Invert column permutation to find out where my columns should go */
1516   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1517   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1518   PetscCall(PetscSFSetFromOptions(sf));
1519   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1520   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1521   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1522   PetscCall(PetscSFDestroy(&sf));
1523 
1524   PetscCall(ISRestoreIndices(rowp, &rwant));
1525   PetscCall(ISRestoreIndices(colp, &cwant));
1526   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1527 
1528   /* Find out where my gcols should go */
1529   PetscCall(MatGetSize(aB, NULL, &ng));
1530   PetscCall(PetscMalloc1(ng, &gcdest));
1531   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1532   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1533   PetscCall(PetscSFSetFromOptions(sf));
1534   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1535   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1536   PetscCall(PetscSFDestroy(&sf));
1537 
1538   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1539   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1540   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1541   for (i = 0; i < m; i++) {
1542     PetscInt    row = rdest[i];
1543     PetscMPIInt rowner;
1544     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1545     for (j = ai[i]; j < ai[i + 1]; j++) {
1546       PetscInt    col = cdest[aj[j]];
1547       PetscMPIInt cowner;
1548       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1549       if (rowner == cowner) dnnz[i]++;
1550       else onnz[i]++;
1551     }
1552     for (j = bi[i]; j < bi[i + 1]; j++) {
1553       PetscInt    col = gcdest[bj[j]];
1554       PetscMPIInt cowner;
1555       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1556       if (rowner == cowner) dnnz[i]++;
1557       else onnz[i]++;
1558     }
1559   }
1560   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1561   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1562   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1563   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1564   PetscCall(PetscSFDestroy(&rowsf));
1565 
1566   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1567   PetscCall(MatSeqAIJGetArray(aA, &aa));
1568   PetscCall(MatSeqAIJGetArray(aB, &ba));
1569   for (i = 0; i < m; i++) {
1570     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1571     PetscInt  j0, rowlen;
1572     rowlen = ai[i + 1] - ai[i];
1573     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1574       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1575       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1576     }
1577     rowlen = bi[i + 1] - bi[i];
1578     for (j0 = j = 0; j < rowlen; j0 = j) {
1579       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1580       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1581     }
1582   }
1583   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1584   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1585   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1586   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1587   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1588   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1589   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1590   PetscCall(PetscFree3(work, rdest, cdest));
1591   PetscCall(PetscFree(gcdest));
1592   if (parcolp) PetscCall(ISDestroy(&colp));
1593   *B = Aperm;
1594   PetscFunctionReturn(PETSC_SUCCESS);
1595 }
1596 
1597 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1598 {
1599   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1600 
1601   PetscFunctionBegin;
1602   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1603   if (ghosts) *ghosts = aij->garray;
1604   PetscFunctionReturn(PETSC_SUCCESS);
1605 }
1606 
1607 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1608 {
1609   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1610   Mat            A = mat->A, B = mat->B;
1611   PetscLogDouble isend[5], irecv[5];
1612 
1613   PetscFunctionBegin;
1614   info->block_size = 1.0;
1615   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1616 
1617   isend[0] = info->nz_used;
1618   isend[1] = info->nz_allocated;
1619   isend[2] = info->nz_unneeded;
1620   isend[3] = info->memory;
1621   isend[4] = info->mallocs;
1622 
1623   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1624 
1625   isend[0] += info->nz_used;
1626   isend[1] += info->nz_allocated;
1627   isend[2] += info->nz_unneeded;
1628   isend[3] += info->memory;
1629   isend[4] += info->mallocs;
1630   if (flag == MAT_LOCAL) {
1631     info->nz_used      = isend[0];
1632     info->nz_allocated = isend[1];
1633     info->nz_unneeded  = isend[2];
1634     info->memory       = isend[3];
1635     info->mallocs      = isend[4];
1636   } else if (flag == MAT_GLOBAL_MAX) {
1637     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1638 
1639     info->nz_used      = irecv[0];
1640     info->nz_allocated = irecv[1];
1641     info->nz_unneeded  = irecv[2];
1642     info->memory       = irecv[3];
1643     info->mallocs      = irecv[4];
1644   } else if (flag == MAT_GLOBAL_SUM) {
1645     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1646 
1647     info->nz_used      = irecv[0];
1648     info->nz_allocated = irecv[1];
1649     info->nz_unneeded  = irecv[2];
1650     info->memory       = irecv[3];
1651     info->mallocs      = irecv[4];
1652   }
1653   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1654   info->fill_ratio_needed = 0;
1655   info->factor_mallocs    = 0;
1656   PetscFunctionReturn(PETSC_SUCCESS);
1657 }
1658 
1659 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1660 {
1661   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1662 
1663   PetscFunctionBegin;
1664   switch (op) {
1665   case MAT_NEW_NONZERO_LOCATIONS:
1666   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1667   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1668   case MAT_KEEP_NONZERO_PATTERN:
1669   case MAT_NEW_NONZERO_LOCATION_ERR:
1670   case MAT_USE_INODES:
1671   case MAT_IGNORE_ZERO_ENTRIES:
1672   case MAT_FORM_EXPLICIT_TRANSPOSE:
1673     MatCheckPreallocated(A, 1);
1674     PetscCall(MatSetOption(a->A, op, flg));
1675     PetscCall(MatSetOption(a->B, op, flg));
1676     break;
1677   case MAT_ROW_ORIENTED:
1678     MatCheckPreallocated(A, 1);
1679     a->roworiented = flg;
1680 
1681     PetscCall(MatSetOption(a->A, op, flg));
1682     PetscCall(MatSetOption(a->B, op, flg));
1683     break;
1684   case MAT_FORCE_DIAGONAL_ENTRIES:
1685   case MAT_SORTED_FULL:
1686     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1687     break;
1688   case MAT_IGNORE_OFF_PROC_ENTRIES:
1689     a->donotstash = flg;
1690     break;
1691   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1692   case MAT_SPD:
1693   case MAT_SYMMETRIC:
1694   case MAT_STRUCTURALLY_SYMMETRIC:
1695   case MAT_HERMITIAN:
1696   case MAT_SYMMETRY_ETERNAL:
1697   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1698   case MAT_SPD_ETERNAL:
1699     /* if the diagonal matrix is square it inherits some of the properties above */
1700     break;
1701   case MAT_SUBMAT_SINGLEIS:
1702     A->submat_singleis = flg;
1703     break;
1704   case MAT_STRUCTURE_ONLY:
1705     /* The option is handled directly by MatSetOption() */
1706     break;
1707   default:
1708     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1709   }
1710   PetscFunctionReturn(PETSC_SUCCESS);
1711 }
1712 
1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1714 {
1715   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1716   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1717   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1718   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1719   PetscInt    *cmap, *idx_p;
1720 
1721   PetscFunctionBegin;
1722   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1723   mat->getrowactive = PETSC_TRUE;
1724 
1725   if (!mat->rowvalues && (idx || v)) {
1726     /*
1727         allocate enough space to hold information from the longest row.
1728     */
1729     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1730     PetscInt    max = 1, tmp;
1731     for (i = 0; i < matin->rmap->n; i++) {
1732       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1733       if (max < tmp) max = tmp;
1734     }
1735     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1736   }
1737 
1738   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1739   lrow = row - rstart;
1740 
1741   pvA = &vworkA;
1742   pcA = &cworkA;
1743   pvB = &vworkB;
1744   pcB = &cworkB;
1745   if (!v) {
1746     pvA = NULL;
1747     pvB = NULL;
1748   }
1749   if (!idx) {
1750     pcA = NULL;
1751     if (!v) pcB = NULL;
1752   }
1753   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1754   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1755   nztot = nzA + nzB;
1756 
1757   cmap = mat->garray;
1758   if (v || idx) {
1759     if (nztot) {
1760       /* Sort by increasing column numbers, assuming A and B already sorted */
1761       PetscInt imark = -1;
1762       if (v) {
1763         *v = v_p = mat->rowvalues;
1764         for (i = 0; i < nzB; i++) {
1765           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1766           else break;
1767         }
1768         imark = i;
1769         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1770         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1771       }
1772       if (idx) {
1773         *idx = idx_p = mat->rowindices;
1774         if (imark > -1) {
1775           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1776         } else {
1777           for (i = 0; i < nzB; i++) {
1778             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1779             else break;
1780           }
1781           imark = i;
1782         }
1783         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1784         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1785       }
1786     } else {
1787       if (idx) *idx = NULL;
1788       if (v) *v = NULL;
1789     }
1790   }
1791   *nz = nztot;
1792   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1793   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1798 {
1799   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1800 
1801   PetscFunctionBegin;
1802   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1803   aij->getrowactive = PETSC_FALSE;
1804   PetscFunctionReturn(PETSC_SUCCESS);
1805 }
1806 
1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1808 {
1809   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1810   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1811   PetscInt         i, j, cstart = mat->cmap->rstart;
1812   PetscReal        sum = 0.0;
1813   const MatScalar *v, *amata, *bmata;
1814 
1815   PetscFunctionBegin;
1816   if (aij->size == 1) {
1817     PetscCall(MatNorm(aij->A, type, norm));
1818   } else {
1819     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1820     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1821     if (type == NORM_FROBENIUS) {
1822       v = amata;
1823       for (i = 0; i < amat->nz; i++) {
1824         sum += PetscRealPart(PetscConj(*v) * (*v));
1825         v++;
1826       }
1827       v = bmata;
1828       for (i = 0; i < bmat->nz; i++) {
1829         sum += PetscRealPart(PetscConj(*v) * (*v));
1830         v++;
1831       }
1832       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1833       *norm = PetscSqrtReal(*norm);
1834       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1835     } else if (type == NORM_1) { /* max column norm */
1836       PetscReal *tmp, *tmp2;
1837       PetscInt  *jj, *garray = aij->garray;
1838       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1839       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1840       *norm = 0.0;
1841       v     = amata;
1842       jj    = amat->j;
1843       for (j = 0; j < amat->nz; j++) {
1844         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1845         v++;
1846       }
1847       v  = bmata;
1848       jj = bmat->j;
1849       for (j = 0; j < bmat->nz; j++) {
1850         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1851         v++;
1852       }
1853       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1854       for (j = 0; j < mat->cmap->N; j++) {
1855         if (tmp2[j] > *norm) *norm = tmp2[j];
1856       }
1857       PetscCall(PetscFree(tmp));
1858       PetscCall(PetscFree(tmp2));
1859       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1860     } else if (type == NORM_INFINITY) { /* max row norm */
1861       PetscReal ntemp = 0.0;
1862       for (j = 0; j < aij->A->rmap->n; j++) {
1863         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1864         sum = 0.0;
1865         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1866           sum += PetscAbsScalar(*v);
1867           v++;
1868         }
1869         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1870         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1871           sum += PetscAbsScalar(*v);
1872           v++;
1873         }
1874         if (sum > ntemp) ntemp = sum;
1875       }
1876       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1877       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1878     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1879     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1880     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1881   }
1882   PetscFunctionReturn(PETSC_SUCCESS);
1883 }
1884 
1885 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1886 {
1887   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1888   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1889   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1890   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1891   Mat              B, A_diag, *B_diag;
1892   const MatScalar *pbv, *bv;
1893 
1894   PetscFunctionBegin;
1895   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1896   ma = A->rmap->n;
1897   na = A->cmap->n;
1898   mb = a->B->rmap->n;
1899   nb = a->B->cmap->n;
1900   ai = Aloc->i;
1901   aj = Aloc->j;
1902   bi = Bloc->i;
1903   bj = Bloc->j;
1904   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1905     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1906     PetscSFNode         *oloc;
1907     PETSC_UNUSED PetscSF sf;
1908 
1909     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1910     /* compute d_nnz for preallocation */
1911     PetscCall(PetscArrayzero(d_nnz, na));
1912     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1913     /* compute local off-diagonal contributions */
1914     PetscCall(PetscArrayzero(g_nnz, nb));
1915     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1916     /* map those to global */
1917     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1918     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1919     PetscCall(PetscSFSetFromOptions(sf));
1920     PetscCall(PetscArrayzero(o_nnz, na));
1921     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1922     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1923     PetscCall(PetscSFDestroy(&sf));
1924 
1925     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1926     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1927     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1928     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1929     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1930     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1931   } else {
1932     B = *matout;
1933     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1934   }
1935 
1936   b           = (Mat_MPIAIJ *)B->data;
1937   A_diag      = a->A;
1938   B_diag      = &b->A;
1939   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1940   A_diag_ncol = A_diag->cmap->N;
1941   B_diag_ilen = sub_B_diag->ilen;
1942   B_diag_i    = sub_B_diag->i;
1943 
1944   /* Set ilen for diagonal of B */
1945   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1946 
1947   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1948   very quickly (=without using MatSetValues), because all writes are local. */
1949   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1950   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1951 
1952   /* copy over the B part */
1953   PetscCall(PetscMalloc1(bi[mb], &cols));
1954   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1955   pbv = bv;
1956   row = A->rmap->rstart;
1957   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1958   cols_tmp = cols;
1959   for (i = 0; i < mb; i++) {
1960     ncol = bi[i + 1] - bi[i];
1961     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1962     row++;
1963     if (pbv) pbv += ncol;
1964     if (cols_tmp) cols_tmp += ncol;
1965   }
1966   PetscCall(PetscFree(cols));
1967   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1968 
1969   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1970   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1971   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1972     *matout = B;
1973   } else {
1974     PetscCall(MatHeaderMerge(A, &B));
1975   }
1976   PetscFunctionReturn(PETSC_SUCCESS);
1977 }
1978 
1979 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1980 {
1981   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1982   Mat         a = aij->A, b = aij->B;
1983   PetscInt    s1, s2, s3;
1984 
1985   PetscFunctionBegin;
1986   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1987   if (rr) {
1988     PetscCall(VecGetLocalSize(rr, &s1));
1989     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1990     /* Overlap communication with computation. */
1991     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1992   }
1993   if (ll) {
1994     PetscCall(VecGetLocalSize(ll, &s1));
1995     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1996     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1997   }
1998   /* scale  the diagonal block */
1999   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2000 
2001   if (rr) {
2002     /* Do a scatter end and then right scale the off-diagonal block */
2003     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2004     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2005   }
2006   PetscFunctionReturn(PETSC_SUCCESS);
2007 }
2008 
2009 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2010 {
2011   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2012 
2013   PetscFunctionBegin;
2014   PetscCall(MatSetUnfactored(a->A));
2015   PetscFunctionReturn(PETSC_SUCCESS);
2016 }
2017 
2018 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2019 {
2020   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2021   Mat         a, b, c, d;
2022   PetscBool   flg;
2023 
2024   PetscFunctionBegin;
2025   a = matA->A;
2026   b = matA->B;
2027   c = matB->A;
2028   d = matB->B;
2029 
2030   PetscCall(MatEqual(a, c, &flg));
2031   if (flg) PetscCall(MatEqual(b, d, &flg));
2032   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2033   PetscFunctionReturn(PETSC_SUCCESS);
2034 }
2035 
2036 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2037 {
2038   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2039   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2040 
2041   PetscFunctionBegin;
2042   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2043   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2044     /* because of the column compression in the off-processor part of the matrix a->B,
2045        the number of columns in a->B and b->B may be different, hence we cannot call
2046        the MatCopy() directly on the two parts. If need be, we can provide a more
2047        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2048        then copying the submatrices */
2049     PetscCall(MatCopy_Basic(A, B, str));
2050   } else {
2051     PetscCall(MatCopy(a->A, b->A, str));
2052     PetscCall(MatCopy(a->B, b->B, str));
2053   }
2054   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2055   PetscFunctionReturn(PETSC_SUCCESS);
2056 }
2057 
2058 /*
2059    Computes the number of nonzeros per row needed for preallocation when X and Y
2060    have different nonzero structure.
2061 */
2062 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2063 {
2064   PetscInt i, j, k, nzx, nzy;
2065 
2066   PetscFunctionBegin;
2067   /* Set the number of nonzeros in the new matrix */
2068   for (i = 0; i < m; i++) {
2069     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2070     nzx    = xi[i + 1] - xi[i];
2071     nzy    = yi[i + 1] - yi[i];
2072     nnz[i] = 0;
2073     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2074       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2075       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2076       nnz[i]++;
2077     }
2078     for (; k < nzy; k++) nnz[i]++;
2079   }
2080   PetscFunctionReturn(PETSC_SUCCESS);
2081 }
2082 
2083 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2084 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2085 {
2086   PetscInt    m = Y->rmap->N;
2087   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2088   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2089 
2090   PetscFunctionBegin;
2091   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2092   PetscFunctionReturn(PETSC_SUCCESS);
2093 }
2094 
2095 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2096 {
2097   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2098 
2099   PetscFunctionBegin;
2100   if (str == SAME_NONZERO_PATTERN) {
2101     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2102     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2103   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2104     PetscCall(MatAXPY_Basic(Y, a, X, str));
2105   } else {
2106     Mat       B;
2107     PetscInt *nnz_d, *nnz_o;
2108 
2109     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2110     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2111     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2112     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2113     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2114     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2115     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2116     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2117     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2118     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2119     PetscCall(MatHeaderMerge(Y, &B));
2120     PetscCall(PetscFree(nnz_d));
2121     PetscCall(PetscFree(nnz_o));
2122   }
2123   PetscFunctionReturn(PETSC_SUCCESS);
2124 }
2125 
2126 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2127 
2128 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2129 {
2130   PetscFunctionBegin;
2131   if (PetscDefined(USE_COMPLEX)) {
2132     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2133 
2134     PetscCall(MatConjugate_SeqAIJ(aij->A));
2135     PetscCall(MatConjugate_SeqAIJ(aij->B));
2136   }
2137   PetscFunctionReturn(PETSC_SUCCESS);
2138 }
2139 
2140 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2141 {
2142   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2143 
2144   PetscFunctionBegin;
2145   PetscCall(MatRealPart(a->A));
2146   PetscCall(MatRealPart(a->B));
2147   PetscFunctionReturn(PETSC_SUCCESS);
2148 }
2149 
2150 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2151 {
2152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2153 
2154   PetscFunctionBegin;
2155   PetscCall(MatImaginaryPart(a->A));
2156   PetscCall(MatImaginaryPart(a->B));
2157   PetscFunctionReturn(PETSC_SUCCESS);
2158 }
2159 
2160 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2161 {
2162   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2163   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2164   PetscScalar       *va, *vv;
2165   Vec                vB, vA;
2166   const PetscScalar *vb;
2167 
2168   PetscFunctionBegin;
2169   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2170   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2171 
2172   PetscCall(VecGetArrayWrite(vA, &va));
2173   if (idx) {
2174     for (i = 0; i < m; i++) {
2175       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2176     }
2177   }
2178 
2179   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2180   PetscCall(PetscMalloc1(m, &idxb));
2181   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2182 
2183   PetscCall(VecGetArrayWrite(v, &vv));
2184   PetscCall(VecGetArrayRead(vB, &vb));
2185   for (i = 0; i < m; i++) {
2186     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2187       vv[i] = vb[i];
2188       if (idx) idx[i] = a->garray[idxb[i]];
2189     } else {
2190       vv[i] = va[i];
2191       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2192     }
2193   }
2194   PetscCall(VecRestoreArrayWrite(vA, &vv));
2195   PetscCall(VecRestoreArrayWrite(vA, &va));
2196   PetscCall(VecRestoreArrayRead(vB, &vb));
2197   PetscCall(PetscFree(idxb));
2198   PetscCall(VecDestroy(&vA));
2199   PetscCall(VecDestroy(&vB));
2200   PetscFunctionReturn(PETSC_SUCCESS);
2201 }
2202 
2203 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2204 {
2205   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2206   PetscInt           m = A->rmap->n, n = A->cmap->n;
2207   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2208   PetscInt          *cmap = mat->garray;
2209   PetscInt          *diagIdx, *offdiagIdx;
2210   Vec                diagV, offdiagV;
2211   PetscScalar       *a, *diagA, *offdiagA;
2212   const PetscScalar *ba, *bav;
2213   PetscInt           r, j, col, ncols, *bi, *bj;
2214   Mat                B = mat->B;
2215   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2216 
2217   PetscFunctionBegin;
2218   /* When a process holds entire A and other processes have no entry */
2219   if (A->cmap->N == n) {
2220     PetscCall(VecGetArrayWrite(v, &diagA));
2221     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2222     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2223     PetscCall(VecDestroy(&diagV));
2224     PetscCall(VecRestoreArrayWrite(v, &diagA));
2225     PetscFunctionReturn(PETSC_SUCCESS);
2226   } else if (n == 0) {
2227     if (m) {
2228       PetscCall(VecGetArrayWrite(v, &a));
2229       for (r = 0; r < m; r++) {
2230         a[r] = 0.0;
2231         if (idx) idx[r] = -1;
2232       }
2233       PetscCall(VecRestoreArrayWrite(v, &a));
2234     }
2235     PetscFunctionReturn(PETSC_SUCCESS);
2236   }
2237 
2238   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2239   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2240   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2241   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2242 
2243   /* Get offdiagIdx[] for implicit 0.0 */
2244   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2245   ba = bav;
2246   bi = b->i;
2247   bj = b->j;
2248   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2249   for (r = 0; r < m; r++) {
2250     ncols = bi[r + 1] - bi[r];
2251     if (ncols == A->cmap->N - n) { /* Brow is dense */
2252       offdiagA[r]   = *ba;
2253       offdiagIdx[r] = cmap[0];
2254     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2255       offdiagA[r] = 0.0;
2256 
2257       /* Find first hole in the cmap */
2258       for (j = 0; j < ncols; j++) {
2259         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2260         if (col > j && j < cstart) {
2261           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2262           break;
2263         } else if (col > j + n && j >= cstart) {
2264           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2265           break;
2266         }
2267       }
2268       if (j == ncols && ncols < A->cmap->N - n) {
2269         /* a hole is outside compressed Bcols */
2270         if (ncols == 0) {
2271           if (cstart) {
2272             offdiagIdx[r] = 0;
2273           } else offdiagIdx[r] = cend;
2274         } else { /* ncols > 0 */
2275           offdiagIdx[r] = cmap[ncols - 1] + 1;
2276           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2277         }
2278       }
2279     }
2280 
2281     for (j = 0; j < ncols; j++) {
2282       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2283         offdiagA[r]   = *ba;
2284         offdiagIdx[r] = cmap[*bj];
2285       }
2286       ba++;
2287       bj++;
2288     }
2289   }
2290 
2291   PetscCall(VecGetArrayWrite(v, &a));
2292   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2293   for (r = 0; r < m; ++r) {
2294     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2295       a[r] = diagA[r];
2296       if (idx) idx[r] = cstart + diagIdx[r];
2297     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2298       a[r] = diagA[r];
2299       if (idx) {
2300         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2301           idx[r] = cstart + diagIdx[r];
2302         } else idx[r] = offdiagIdx[r];
2303       }
2304     } else {
2305       a[r] = offdiagA[r];
2306       if (idx) idx[r] = offdiagIdx[r];
2307     }
2308   }
2309   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2310   PetscCall(VecRestoreArrayWrite(v, &a));
2311   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2312   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2313   PetscCall(VecDestroy(&diagV));
2314   PetscCall(VecDestroy(&offdiagV));
2315   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2316   PetscFunctionReturn(PETSC_SUCCESS);
2317 }
2318 
2319 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2320 {
2321   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2322   PetscInt           m = A->rmap->n, n = A->cmap->n;
2323   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2324   PetscInt          *cmap = mat->garray;
2325   PetscInt          *diagIdx, *offdiagIdx;
2326   Vec                diagV, offdiagV;
2327   PetscScalar       *a, *diagA, *offdiagA;
2328   const PetscScalar *ba, *bav;
2329   PetscInt           r, j, col, ncols, *bi, *bj;
2330   Mat                B = mat->B;
2331   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2332 
2333   PetscFunctionBegin;
2334   /* When a process holds entire A and other processes have no entry */
2335   if (A->cmap->N == n) {
2336     PetscCall(VecGetArrayWrite(v, &diagA));
2337     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2338     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2339     PetscCall(VecDestroy(&diagV));
2340     PetscCall(VecRestoreArrayWrite(v, &diagA));
2341     PetscFunctionReturn(PETSC_SUCCESS);
2342   } else if (n == 0) {
2343     if (m) {
2344       PetscCall(VecGetArrayWrite(v, &a));
2345       for (r = 0; r < m; r++) {
2346         a[r] = PETSC_MAX_REAL;
2347         if (idx) idx[r] = -1;
2348       }
2349       PetscCall(VecRestoreArrayWrite(v, &a));
2350     }
2351     PetscFunctionReturn(PETSC_SUCCESS);
2352   }
2353 
2354   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2355   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2356   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2357   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2358 
2359   /* Get offdiagIdx[] for implicit 0.0 */
2360   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2361   ba = bav;
2362   bi = b->i;
2363   bj = b->j;
2364   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2365   for (r = 0; r < m; r++) {
2366     ncols = bi[r + 1] - bi[r];
2367     if (ncols == A->cmap->N - n) { /* Brow is dense */
2368       offdiagA[r]   = *ba;
2369       offdiagIdx[r] = cmap[0];
2370     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2371       offdiagA[r] = 0.0;
2372 
2373       /* Find first hole in the cmap */
2374       for (j = 0; j < ncols; j++) {
2375         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2376         if (col > j && j < cstart) {
2377           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2378           break;
2379         } else if (col > j + n && j >= cstart) {
2380           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2381           break;
2382         }
2383       }
2384       if (j == ncols && ncols < A->cmap->N - n) {
2385         /* a hole is outside compressed Bcols */
2386         if (ncols == 0) {
2387           if (cstart) {
2388             offdiagIdx[r] = 0;
2389           } else offdiagIdx[r] = cend;
2390         } else { /* ncols > 0 */
2391           offdiagIdx[r] = cmap[ncols - 1] + 1;
2392           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2393         }
2394       }
2395     }
2396 
2397     for (j = 0; j < ncols; j++) {
2398       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2399         offdiagA[r]   = *ba;
2400         offdiagIdx[r] = cmap[*bj];
2401       }
2402       ba++;
2403       bj++;
2404     }
2405   }
2406 
2407   PetscCall(VecGetArrayWrite(v, &a));
2408   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2409   for (r = 0; r < m; ++r) {
2410     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2411       a[r] = diagA[r];
2412       if (idx) idx[r] = cstart + diagIdx[r];
2413     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2414       a[r] = diagA[r];
2415       if (idx) {
2416         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2417           idx[r] = cstart + diagIdx[r];
2418         } else idx[r] = offdiagIdx[r];
2419       }
2420     } else {
2421       a[r] = offdiagA[r];
2422       if (idx) idx[r] = offdiagIdx[r];
2423     }
2424   }
2425   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2426   PetscCall(VecRestoreArrayWrite(v, &a));
2427   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2428   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2429   PetscCall(VecDestroy(&diagV));
2430   PetscCall(VecDestroy(&offdiagV));
2431   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2432   PetscFunctionReturn(PETSC_SUCCESS);
2433 }
2434 
2435 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2436 {
2437   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2438   PetscInt           m = A->rmap->n, n = A->cmap->n;
2439   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2440   PetscInt          *cmap = mat->garray;
2441   PetscInt          *diagIdx, *offdiagIdx;
2442   Vec                diagV, offdiagV;
2443   PetscScalar       *a, *diagA, *offdiagA;
2444   const PetscScalar *ba, *bav;
2445   PetscInt           r, j, col, ncols, *bi, *bj;
2446   Mat                B = mat->B;
2447   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2448 
2449   PetscFunctionBegin;
2450   /* When a process holds entire A and other processes have no entry */
2451   if (A->cmap->N == n) {
2452     PetscCall(VecGetArrayWrite(v, &diagA));
2453     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2454     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2455     PetscCall(VecDestroy(&diagV));
2456     PetscCall(VecRestoreArrayWrite(v, &diagA));
2457     PetscFunctionReturn(PETSC_SUCCESS);
2458   } else if (n == 0) {
2459     if (m) {
2460       PetscCall(VecGetArrayWrite(v, &a));
2461       for (r = 0; r < m; r++) {
2462         a[r] = PETSC_MIN_REAL;
2463         if (idx) idx[r] = -1;
2464       }
2465       PetscCall(VecRestoreArrayWrite(v, &a));
2466     }
2467     PetscFunctionReturn(PETSC_SUCCESS);
2468   }
2469 
2470   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2471   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2472   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2473   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2474 
2475   /* Get offdiagIdx[] for implicit 0.0 */
2476   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2477   ba = bav;
2478   bi = b->i;
2479   bj = b->j;
2480   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2481   for (r = 0; r < m; r++) {
2482     ncols = bi[r + 1] - bi[r];
2483     if (ncols == A->cmap->N - n) { /* Brow is dense */
2484       offdiagA[r]   = *ba;
2485       offdiagIdx[r] = cmap[0];
2486     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2487       offdiagA[r] = 0.0;
2488 
2489       /* Find first hole in the cmap */
2490       for (j = 0; j < ncols; j++) {
2491         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2492         if (col > j && j < cstart) {
2493           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2494           break;
2495         } else if (col > j + n && j >= cstart) {
2496           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2497           break;
2498         }
2499       }
2500       if (j == ncols && ncols < A->cmap->N - n) {
2501         /* a hole is outside compressed Bcols */
2502         if (ncols == 0) {
2503           if (cstart) {
2504             offdiagIdx[r] = 0;
2505           } else offdiagIdx[r] = cend;
2506         } else { /* ncols > 0 */
2507           offdiagIdx[r] = cmap[ncols - 1] + 1;
2508           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2509         }
2510       }
2511     }
2512 
2513     for (j = 0; j < ncols; j++) {
2514       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2515         offdiagA[r]   = *ba;
2516         offdiagIdx[r] = cmap[*bj];
2517       }
2518       ba++;
2519       bj++;
2520     }
2521   }
2522 
2523   PetscCall(VecGetArrayWrite(v, &a));
2524   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2525   for (r = 0; r < m; ++r) {
2526     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2527       a[r] = diagA[r];
2528       if (idx) idx[r] = cstart + diagIdx[r];
2529     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2530       a[r] = diagA[r];
2531       if (idx) {
2532         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2533           idx[r] = cstart + diagIdx[r];
2534         } else idx[r] = offdiagIdx[r];
2535       }
2536     } else {
2537       a[r] = offdiagA[r];
2538       if (idx) idx[r] = offdiagIdx[r];
2539     }
2540   }
2541   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2542   PetscCall(VecRestoreArrayWrite(v, &a));
2543   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2544   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2545   PetscCall(VecDestroy(&diagV));
2546   PetscCall(VecDestroy(&offdiagV));
2547   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2548   PetscFunctionReturn(PETSC_SUCCESS);
2549 }
2550 
2551 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2552 {
2553   Mat *dummy;
2554 
2555   PetscFunctionBegin;
2556   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2557   *newmat = *dummy;
2558   PetscCall(PetscFree(dummy));
2559   PetscFunctionReturn(PETSC_SUCCESS);
2560 }
2561 
2562 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2563 {
2564   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2565 
2566   PetscFunctionBegin;
2567   PetscCall(MatInvertBlockDiagonal(a->A, values));
2568   A->factorerrortype = a->A->factorerrortype;
2569   PetscFunctionReturn(PETSC_SUCCESS);
2570 }
2571 
2572 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2573 {
2574   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2575 
2576   PetscFunctionBegin;
2577   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2578   PetscCall(MatSetRandom(aij->A, rctx));
2579   if (x->assembled) {
2580     PetscCall(MatSetRandom(aij->B, rctx));
2581   } else {
2582     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2583   }
2584   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2585   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2586   PetscFunctionReturn(PETSC_SUCCESS);
2587 }
2588 
2589 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2590 {
2591   PetscFunctionBegin;
2592   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2593   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2594   PetscFunctionReturn(PETSC_SUCCESS);
2595 }
2596 
2597 /*@
2598   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2599 
2600   Not Collective
2601 
2602   Input Parameter:
2603 . A - the matrix
2604 
2605   Output Parameter:
2606 . nz - the number of nonzeros
2607 
2608   Level: advanced
2609 
2610 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2611 @*/
2612 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2613 {
2614   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2615   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2616   PetscBool   isaij;
2617 
2618   PetscFunctionBegin;
2619   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2620   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2621   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2622   PetscFunctionReturn(PETSC_SUCCESS);
2623 }
2624 
2625 /*@
2626   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2627 
2628   Collective
2629 
2630   Input Parameters:
2631 + A  - the matrix
2632 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2633 
2634   Level: advanced
2635 
2636 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2637 @*/
2638 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2639 {
2640   PetscFunctionBegin;
2641   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2642   PetscFunctionReturn(PETSC_SUCCESS);
2643 }
2644 
2645 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2646 {
2647   PetscBool sc = PETSC_FALSE, flg;
2648 
2649   PetscFunctionBegin;
2650   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2651   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2652   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2653   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2654   PetscOptionsHeadEnd();
2655   PetscFunctionReturn(PETSC_SUCCESS);
2656 }
2657 
2658 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2659 {
2660   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2661   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2662 
2663   PetscFunctionBegin;
2664   if (!Y->preallocated) {
2665     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2666   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2667     PetscInt nonew = aij->nonew;
2668     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2669     aij->nonew = nonew;
2670   }
2671   PetscCall(MatShift_Basic(Y, a));
2672   PetscFunctionReturn(PETSC_SUCCESS);
2673 }
2674 
2675 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2676 {
2677   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2678 
2679   PetscFunctionBegin;
2680   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2681   PetscCall(MatMissingDiagonal(a->A, missing, d));
2682   if (d) {
2683     PetscInt rstart;
2684     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2685     *d += rstart;
2686   }
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2691 {
2692   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2693 
2694   PetscFunctionBegin;
2695   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2696   PetscFunctionReturn(PETSC_SUCCESS);
2697 }
2698 
2699 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2700 {
2701   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2702 
2703   PetscFunctionBegin;
2704   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2705   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2706   PetscFunctionReturn(PETSC_SUCCESS);
2707 }
2708 
2709 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2710                                        MatGetRow_MPIAIJ,
2711                                        MatRestoreRow_MPIAIJ,
2712                                        MatMult_MPIAIJ,
2713                                        /* 4*/ MatMultAdd_MPIAIJ,
2714                                        MatMultTranspose_MPIAIJ,
2715                                        MatMultTransposeAdd_MPIAIJ,
2716                                        NULL,
2717                                        NULL,
2718                                        NULL,
2719                                        /*10*/ NULL,
2720                                        NULL,
2721                                        NULL,
2722                                        MatSOR_MPIAIJ,
2723                                        MatTranspose_MPIAIJ,
2724                                        /*15*/ MatGetInfo_MPIAIJ,
2725                                        MatEqual_MPIAIJ,
2726                                        MatGetDiagonal_MPIAIJ,
2727                                        MatDiagonalScale_MPIAIJ,
2728                                        MatNorm_MPIAIJ,
2729                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2730                                        MatAssemblyEnd_MPIAIJ,
2731                                        MatSetOption_MPIAIJ,
2732                                        MatZeroEntries_MPIAIJ,
2733                                        /*24*/ MatZeroRows_MPIAIJ,
2734                                        NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        NULL,
2738                                        /*29*/ MatSetUp_MPI_Hash,
2739                                        NULL,
2740                                        NULL,
2741                                        MatGetDiagonalBlock_MPIAIJ,
2742                                        NULL,
2743                                        /*34*/ MatDuplicate_MPIAIJ,
2744                                        NULL,
2745                                        NULL,
2746                                        NULL,
2747                                        NULL,
2748                                        /*39*/ MatAXPY_MPIAIJ,
2749                                        MatCreateSubMatrices_MPIAIJ,
2750                                        MatIncreaseOverlap_MPIAIJ,
2751                                        MatGetValues_MPIAIJ,
2752                                        MatCopy_MPIAIJ,
2753                                        /*44*/ MatGetRowMax_MPIAIJ,
2754                                        MatScale_MPIAIJ,
2755                                        MatShift_MPIAIJ,
2756                                        MatDiagonalSet_MPIAIJ,
2757                                        MatZeroRowsColumns_MPIAIJ,
2758                                        /*49*/ MatSetRandom_MPIAIJ,
2759                                        MatGetRowIJ_MPIAIJ,
2760                                        MatRestoreRowIJ_MPIAIJ,
2761                                        NULL,
2762                                        NULL,
2763                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2764                                        NULL,
2765                                        MatSetUnfactored_MPIAIJ,
2766                                        MatPermute_MPIAIJ,
2767                                        NULL,
2768                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2769                                        MatDestroy_MPIAIJ,
2770                                        MatView_MPIAIJ,
2771                                        NULL,
2772                                        NULL,
2773                                        /*64*/ NULL,
2774                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2775                                        NULL,
2776                                        NULL,
2777                                        NULL,
2778                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2779                                        MatGetRowMinAbs_MPIAIJ,
2780                                        NULL,
2781                                        NULL,
2782                                        NULL,
2783                                        NULL,
2784                                        /*75*/ MatFDColoringApply_AIJ,
2785                                        MatSetFromOptions_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        MatFindZeroDiagonals_MPIAIJ,
2789                                        /*80*/ NULL,
2790                                        NULL,
2791                                        NULL,
2792                                        /*83*/ MatLoad_MPIAIJ,
2793                                        MatIsSymmetric_MPIAIJ,
2794                                        NULL,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        /*89*/ NULL,
2799                                        NULL,
2800                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2804                                        NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        MatBindToCPU_MPIAIJ,
2808                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2809                                        NULL,
2810                                        NULL,
2811                                        MatConjugate_MPIAIJ,
2812                                        NULL,
2813                                        /*104*/ MatSetValuesRow_MPIAIJ,
2814                                        MatRealPart_MPIAIJ,
2815                                        MatImaginaryPart_MPIAIJ,
2816                                        NULL,
2817                                        NULL,
2818                                        /*109*/ NULL,
2819                                        NULL,
2820                                        MatGetRowMin_MPIAIJ,
2821                                        NULL,
2822                                        MatMissingDiagonal_MPIAIJ,
2823                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2824                                        NULL,
2825                                        MatGetGhosts_MPIAIJ,
2826                                        NULL,
2827                                        NULL,
2828                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2829                                        NULL,
2830                                        NULL,
2831                                        NULL,
2832                                        MatGetMultiProcBlock_MPIAIJ,
2833                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2834                                        MatGetColumnReductions_MPIAIJ,
2835                                        MatInvertBlockDiagonal_MPIAIJ,
2836                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2837                                        MatCreateSubMatricesMPI_MPIAIJ,
2838                                        /*129*/ NULL,
2839                                        NULL,
2840                                        NULL,
2841                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2842                                        NULL,
2843                                        /*134*/ NULL,
2844                                        NULL,
2845                                        NULL,
2846                                        NULL,
2847                                        NULL,
2848                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2849                                        NULL,
2850                                        NULL,
2851                                        MatFDColoringSetUp_MPIXAIJ,
2852                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2853                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2854                                        /*145*/ NULL,
2855                                        NULL,
2856                                        NULL,
2857                                        MatCreateGraph_Simple_AIJ,
2858                                        NULL,
2859                                        /*150*/ NULL,
2860                                        MatEliminateZeros_MPIAIJ};
2861 
2862 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2863 {
2864   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2865 
2866   PetscFunctionBegin;
2867   PetscCall(MatStoreValues(aij->A));
2868   PetscCall(MatStoreValues(aij->B));
2869   PetscFunctionReturn(PETSC_SUCCESS);
2870 }
2871 
2872 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2873 {
2874   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2875 
2876   PetscFunctionBegin;
2877   PetscCall(MatRetrieveValues(aij->A));
2878   PetscCall(MatRetrieveValues(aij->B));
2879   PetscFunctionReturn(PETSC_SUCCESS);
2880 }
2881 
2882 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2883 {
2884   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2885   PetscMPIInt size;
2886 
2887   PetscFunctionBegin;
2888   if (B->hash_active) {
2889     B->ops[0]      = b->cops;
2890     B->hash_active = PETSC_FALSE;
2891   }
2892   PetscCall(PetscLayoutSetUp(B->rmap));
2893   PetscCall(PetscLayoutSetUp(B->cmap));
2894 
2895 #if defined(PETSC_USE_CTABLE)
2896   PetscCall(PetscHMapIDestroy(&b->colmap));
2897 #else
2898   PetscCall(PetscFree(b->colmap));
2899 #endif
2900   PetscCall(PetscFree(b->garray));
2901   PetscCall(VecDestroy(&b->lvec));
2902   PetscCall(VecScatterDestroy(&b->Mvctx));
2903 
2904   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2905   PetscCall(MatDestroy(&b->B));
2906   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2907   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2908   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2909   PetscCall(MatSetType(b->B, MATSEQAIJ));
2910 
2911   PetscCall(MatDestroy(&b->A));
2912   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2913   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2914   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2915   PetscCall(MatSetType(b->A, MATSEQAIJ));
2916 
2917   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2918   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2919   B->preallocated  = PETSC_TRUE;
2920   B->was_assembled = PETSC_FALSE;
2921   B->assembled     = PETSC_FALSE;
2922   PetscFunctionReturn(PETSC_SUCCESS);
2923 }
2924 
2925 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2926 {
2927   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2928 
2929   PetscFunctionBegin;
2930   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2931   PetscCall(PetscLayoutSetUp(B->rmap));
2932   PetscCall(PetscLayoutSetUp(B->cmap));
2933 
2934 #if defined(PETSC_USE_CTABLE)
2935   PetscCall(PetscHMapIDestroy(&b->colmap));
2936 #else
2937   PetscCall(PetscFree(b->colmap));
2938 #endif
2939   PetscCall(PetscFree(b->garray));
2940   PetscCall(VecDestroy(&b->lvec));
2941   PetscCall(VecScatterDestroy(&b->Mvctx));
2942 
2943   PetscCall(MatResetPreallocation(b->A));
2944   PetscCall(MatResetPreallocation(b->B));
2945   B->preallocated  = PETSC_TRUE;
2946   B->was_assembled = PETSC_FALSE;
2947   B->assembled     = PETSC_FALSE;
2948   PetscFunctionReturn(PETSC_SUCCESS);
2949 }
2950 
2951 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2952 {
2953   Mat         mat;
2954   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2955 
2956   PetscFunctionBegin;
2957   *newmat = NULL;
2958   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2959   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2960   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2961   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2962   a = (Mat_MPIAIJ *)mat->data;
2963 
2964   mat->factortype = matin->factortype;
2965   mat->assembled  = matin->assembled;
2966   mat->insertmode = NOT_SET_VALUES;
2967 
2968   a->size         = oldmat->size;
2969   a->rank         = oldmat->rank;
2970   a->donotstash   = oldmat->donotstash;
2971   a->roworiented  = oldmat->roworiented;
2972   a->rowindices   = NULL;
2973   a->rowvalues    = NULL;
2974   a->getrowactive = PETSC_FALSE;
2975 
2976   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2977   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2978   if (matin->hash_active) {
2979     PetscCall(MatSetUp(mat));
2980   } else {
2981     mat->preallocated = matin->preallocated;
2982     if (oldmat->colmap) {
2983 #if defined(PETSC_USE_CTABLE)
2984       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2985 #else
2986       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2987       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2988 #endif
2989     } else a->colmap = NULL;
2990     if (oldmat->garray) {
2991       PetscInt len;
2992       len = oldmat->B->cmap->n;
2993       PetscCall(PetscMalloc1(len + 1, &a->garray));
2994       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2995     } else a->garray = NULL;
2996 
2997     /* It may happen MatDuplicate is called with a non-assembled matrix
2998       In fact, MatDuplicate only requires the matrix to be preallocated
2999       This may happen inside a DMCreateMatrix_Shell */
3000     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3001     if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3002     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3003     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3004   }
3005   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3006   *newmat = mat;
3007   PetscFunctionReturn(PETSC_SUCCESS);
3008 }
3009 
3010 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3011 {
3012   PetscBool isbinary, ishdf5;
3013 
3014   PetscFunctionBegin;
3015   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3016   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3017   /* force binary viewer to load .info file if it has not yet done so */
3018   PetscCall(PetscViewerSetUp(viewer));
3019   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3020   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3021   if (isbinary) {
3022     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3023   } else if (ishdf5) {
3024 #if defined(PETSC_HAVE_HDF5)
3025     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3026 #else
3027     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3028 #endif
3029   } else {
3030     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3031   }
3032   PetscFunctionReturn(PETSC_SUCCESS);
3033 }
3034 
3035 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3036 {
3037   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3038   PetscInt    *rowidxs, *colidxs;
3039   PetscScalar *matvals;
3040 
3041   PetscFunctionBegin;
3042   PetscCall(PetscViewerSetUp(viewer));
3043 
3044   /* read in matrix header */
3045   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3046   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3047   M  = header[1];
3048   N  = header[2];
3049   nz = header[3];
3050   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3051   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3052   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3053 
3054   /* set block sizes from the viewer's .info file */
3055   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3056   /* set global sizes if not set already */
3057   if (mat->rmap->N < 0) mat->rmap->N = M;
3058   if (mat->cmap->N < 0) mat->cmap->N = N;
3059   PetscCall(PetscLayoutSetUp(mat->rmap));
3060   PetscCall(PetscLayoutSetUp(mat->cmap));
3061 
3062   /* check if the matrix sizes are correct */
3063   PetscCall(MatGetSize(mat, &rows, &cols));
3064   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3065 
3066   /* read in row lengths and build row indices */
3067   PetscCall(MatGetLocalSize(mat, &m, NULL));
3068   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3069   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3070   rowidxs[0] = 0;
3071   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3072   if (nz != PETSC_MAX_INT) {
3073     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3074     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3075   }
3076 
3077   /* read in column indices and matrix values */
3078   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3079   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3080   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3081   /* store matrix indices and values */
3082   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3083   PetscCall(PetscFree(rowidxs));
3084   PetscCall(PetscFree2(colidxs, matvals));
3085   PetscFunctionReturn(PETSC_SUCCESS);
3086 }
3087 
3088 /* Not scalable because of ISAllGather() unless getting all columns. */
3089 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3090 {
3091   IS          iscol_local;
3092   PetscBool   isstride;
3093   PetscMPIInt lisstride = 0, gisstride;
3094 
3095   PetscFunctionBegin;
3096   /* check if we are grabbing all columns*/
3097   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3098 
3099   if (isstride) {
3100     PetscInt start, len, mstart, mlen;
3101     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3102     PetscCall(ISGetLocalSize(iscol, &len));
3103     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3104     if (mstart == start && mlen - mstart == len) lisstride = 1;
3105   }
3106 
3107   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3108   if (gisstride) {
3109     PetscInt N;
3110     PetscCall(MatGetSize(mat, NULL, &N));
3111     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3112     PetscCall(ISSetIdentity(iscol_local));
3113     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3114   } else {
3115     PetscInt cbs;
3116     PetscCall(ISGetBlockSize(iscol, &cbs));
3117     PetscCall(ISAllGather(iscol, &iscol_local));
3118     PetscCall(ISSetBlockSize(iscol_local, cbs));
3119   }
3120 
3121   *isseq = iscol_local;
3122   PetscFunctionReturn(PETSC_SUCCESS);
3123 }
3124 
3125 /*
3126  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3127  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3128 
3129  Input Parameters:
3130 +   mat - matrix
3131 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3132            i.e., mat->rstart <= isrow[i] < mat->rend
3133 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3134            i.e., mat->cstart <= iscol[i] < mat->cend
3135 
3136  Output Parameters:
3137 +   isrow_d - sequential row index set for retrieving mat->A
3138 .   iscol_d - sequential  column index set for retrieving mat->A
3139 .   iscol_o - sequential column index set for retrieving mat->B
3140 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3141  */
3142 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3143 {
3144   Vec             x, cmap;
3145   const PetscInt *is_idx;
3146   PetscScalar    *xarray, *cmaparray;
3147   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3148   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3149   Mat             B    = a->B;
3150   Vec             lvec = a->lvec, lcmap;
3151   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3152   MPI_Comm        comm;
3153   VecScatter      Mvctx = a->Mvctx;
3154 
3155   PetscFunctionBegin;
3156   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3157   PetscCall(ISGetLocalSize(iscol, &ncols));
3158 
3159   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3160   PetscCall(MatCreateVecs(mat, &x, NULL));
3161   PetscCall(VecSet(x, -1.0));
3162   PetscCall(VecDuplicate(x, &cmap));
3163   PetscCall(VecSet(cmap, -1.0));
3164 
3165   /* Get start indices */
3166   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3167   isstart -= ncols;
3168   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3169 
3170   PetscCall(ISGetIndices(iscol, &is_idx));
3171   PetscCall(VecGetArray(x, &xarray));
3172   PetscCall(VecGetArray(cmap, &cmaparray));
3173   PetscCall(PetscMalloc1(ncols, &idx));
3174   for (i = 0; i < ncols; i++) {
3175     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3176     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3177     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3178   }
3179   PetscCall(VecRestoreArray(x, &xarray));
3180   PetscCall(VecRestoreArray(cmap, &cmaparray));
3181   PetscCall(ISRestoreIndices(iscol, &is_idx));
3182 
3183   /* Get iscol_d */
3184   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3185   PetscCall(ISGetBlockSize(iscol, &i));
3186   PetscCall(ISSetBlockSize(*iscol_d, i));
3187 
3188   /* Get isrow_d */
3189   PetscCall(ISGetLocalSize(isrow, &m));
3190   rstart = mat->rmap->rstart;
3191   PetscCall(PetscMalloc1(m, &idx));
3192   PetscCall(ISGetIndices(isrow, &is_idx));
3193   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3194   PetscCall(ISRestoreIndices(isrow, &is_idx));
3195 
3196   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3197   PetscCall(ISGetBlockSize(isrow, &i));
3198   PetscCall(ISSetBlockSize(*isrow_d, i));
3199 
3200   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3201   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3202   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3203 
3204   PetscCall(VecDuplicate(lvec, &lcmap));
3205 
3206   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3207   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3208 
3209   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3210   /* off-process column indices */
3211   count = 0;
3212   PetscCall(PetscMalloc1(Bn, &idx));
3213   PetscCall(PetscMalloc1(Bn, &cmap1));
3214 
3215   PetscCall(VecGetArray(lvec, &xarray));
3216   PetscCall(VecGetArray(lcmap, &cmaparray));
3217   for (i = 0; i < Bn; i++) {
3218     if (PetscRealPart(xarray[i]) > -1.0) {
3219       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3220       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3221       count++;
3222     }
3223   }
3224   PetscCall(VecRestoreArray(lvec, &xarray));
3225   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3226 
3227   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3228   /* cannot ensure iscol_o has same blocksize as iscol! */
3229 
3230   PetscCall(PetscFree(idx));
3231   *garray = cmap1;
3232 
3233   PetscCall(VecDestroy(&x));
3234   PetscCall(VecDestroy(&cmap));
3235   PetscCall(VecDestroy(&lcmap));
3236   PetscFunctionReturn(PETSC_SUCCESS);
3237 }
3238 
3239 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3240 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3241 {
3242   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3243   Mat         M = NULL;
3244   MPI_Comm    comm;
3245   IS          iscol_d, isrow_d, iscol_o;
3246   Mat         Asub = NULL, Bsub = NULL;
3247   PetscInt    n;
3248 
3249   PetscFunctionBegin;
3250   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3251 
3252   if (call == MAT_REUSE_MATRIX) {
3253     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3254     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3255     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3256 
3257     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3258     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3259 
3260     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3261     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3262 
3263     /* Update diagonal and off-diagonal portions of submat */
3264     asub = (Mat_MPIAIJ *)(*submat)->data;
3265     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3266     PetscCall(ISGetLocalSize(iscol_o, &n));
3267     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3268     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3269     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3270 
3271   } else { /* call == MAT_INITIAL_MATRIX) */
3272     const PetscInt *garray;
3273     PetscInt        BsubN;
3274 
3275     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3276     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3277 
3278     /* Create local submatrices Asub and Bsub */
3279     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3280     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3281 
3282     /* Create submatrix M */
3283     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3284 
3285     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3286     asub = (Mat_MPIAIJ *)M->data;
3287 
3288     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3289     n = asub->B->cmap->N;
3290     if (BsubN > n) {
3291       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3292       const PetscInt *idx;
3293       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3294       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3295 
3296       PetscCall(PetscMalloc1(n, &idx_new));
3297       j = 0;
3298       PetscCall(ISGetIndices(iscol_o, &idx));
3299       for (i = 0; i < n; i++) {
3300         if (j >= BsubN) break;
3301         while (subgarray[i] > garray[j]) j++;
3302 
3303         if (subgarray[i] == garray[j]) {
3304           idx_new[i] = idx[j++];
3305         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3306       }
3307       PetscCall(ISRestoreIndices(iscol_o, &idx));
3308 
3309       PetscCall(ISDestroy(&iscol_o));
3310       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3311 
3312     } else if (BsubN < n) {
3313       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3314     }
3315 
3316     PetscCall(PetscFree(garray));
3317     *submat = M;
3318 
3319     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3320     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3321     PetscCall(ISDestroy(&isrow_d));
3322 
3323     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3324     PetscCall(ISDestroy(&iscol_d));
3325 
3326     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3327     PetscCall(ISDestroy(&iscol_o));
3328   }
3329   PetscFunctionReturn(PETSC_SUCCESS);
3330 }
3331 
3332 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3333 {
3334   IS        iscol_local = NULL, isrow_d;
3335   PetscInt  csize;
3336   PetscInt  n, i, j, start, end;
3337   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3338   MPI_Comm  comm;
3339 
3340   PetscFunctionBegin;
3341   /* If isrow has same processor distribution as mat,
3342      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3343   if (call == MAT_REUSE_MATRIX) {
3344     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3345     if (isrow_d) {
3346       sameRowDist  = PETSC_TRUE;
3347       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3348     } else {
3349       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3350       if (iscol_local) {
3351         sameRowDist  = PETSC_TRUE;
3352         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3353       }
3354     }
3355   } else {
3356     /* Check if isrow has same processor distribution as mat */
3357     sameDist[0] = PETSC_FALSE;
3358     PetscCall(ISGetLocalSize(isrow, &n));
3359     if (!n) {
3360       sameDist[0] = PETSC_TRUE;
3361     } else {
3362       PetscCall(ISGetMinMax(isrow, &i, &j));
3363       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3364       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3365     }
3366 
3367     /* Check if iscol has same processor distribution as mat */
3368     sameDist[1] = PETSC_FALSE;
3369     PetscCall(ISGetLocalSize(iscol, &n));
3370     if (!n) {
3371       sameDist[1] = PETSC_TRUE;
3372     } else {
3373       PetscCall(ISGetMinMax(iscol, &i, &j));
3374       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3375       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3376     }
3377 
3378     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3379     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3380     sameRowDist = tsameDist[0];
3381   }
3382 
3383   if (sameRowDist) {
3384     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3385       /* isrow and iscol have same processor distribution as mat */
3386       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3387       PetscFunctionReturn(PETSC_SUCCESS);
3388     } else { /* sameRowDist */
3389       /* isrow has same processor distribution as mat */
3390       if (call == MAT_INITIAL_MATRIX) {
3391         PetscBool sorted;
3392         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3393         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3394         PetscCall(ISGetSize(iscol, &i));
3395         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3396 
3397         PetscCall(ISSorted(iscol_local, &sorted));
3398         if (sorted) {
3399           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3400           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3401           PetscFunctionReturn(PETSC_SUCCESS);
3402         }
3403       } else { /* call == MAT_REUSE_MATRIX */
3404         IS iscol_sub;
3405         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3406         if (iscol_sub) {
3407           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3408           PetscFunctionReturn(PETSC_SUCCESS);
3409         }
3410       }
3411     }
3412   }
3413 
3414   /* General case: iscol -> iscol_local which has global size of iscol */
3415   if (call == MAT_REUSE_MATRIX) {
3416     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3417     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3418   } else {
3419     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3420   }
3421 
3422   PetscCall(ISGetLocalSize(iscol, &csize));
3423   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3424 
3425   if (call == MAT_INITIAL_MATRIX) {
3426     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3427     PetscCall(ISDestroy(&iscol_local));
3428   }
3429   PetscFunctionReturn(PETSC_SUCCESS);
3430 }
3431 
3432 /*@C
3433   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3434   and "off-diagonal" part of the matrix in CSR format.
3435 
3436   Collective
3437 
3438   Input Parameters:
3439 + comm   - MPI communicator
3440 . A      - "diagonal" portion of matrix
3441 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3442 - garray - global index of `B` columns
3443 
3444   Output Parameter:
3445 . mat - the matrix, with input `A` as its local diagonal matrix
3446 
3447   Level: advanced
3448 
3449   Notes:
3450   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3451 
3452   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3453 
3454 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3455 @*/
3456 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3457 {
3458   Mat_MPIAIJ        *maij;
3459   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3460   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3461   const PetscScalar *oa;
3462   Mat                Bnew;
3463   PetscInt           m, n, N;
3464   MatType            mpi_mat_type;
3465 
3466   PetscFunctionBegin;
3467   PetscCall(MatCreate(comm, mat));
3468   PetscCall(MatGetSize(A, &m, &n));
3469   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3470   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3471   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3472   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3473 
3474   /* Get global columns of mat */
3475   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3476 
3477   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3478   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3479   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3480   PetscCall(MatSetType(*mat, mpi_mat_type));
3481 
3482   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3483   maij = (Mat_MPIAIJ *)(*mat)->data;
3484 
3485   (*mat)->preallocated = PETSC_TRUE;
3486 
3487   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3488   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3489 
3490   /* Set A as diagonal portion of *mat */
3491   maij->A = A;
3492 
3493   nz = oi[m];
3494   for (i = 0; i < nz; i++) {
3495     col   = oj[i];
3496     oj[i] = garray[col];
3497   }
3498 
3499   /* Set Bnew as off-diagonal portion of *mat */
3500   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3501   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3502   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3503   bnew        = (Mat_SeqAIJ *)Bnew->data;
3504   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3505   maij->B     = Bnew;
3506 
3507   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3508 
3509   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3510   b->free_a       = PETSC_FALSE;
3511   b->free_ij      = PETSC_FALSE;
3512   PetscCall(MatDestroy(&B));
3513 
3514   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3515   bnew->free_a       = PETSC_TRUE;
3516   bnew->free_ij      = PETSC_TRUE;
3517 
3518   /* condense columns of maij->B */
3519   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3520   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3521   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3522   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3523   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3524   PetscFunctionReturn(PETSC_SUCCESS);
3525 }
3526 
3527 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3528 
3529 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3530 {
3531   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3532   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3533   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3534   Mat             M, Msub, B = a->B;
3535   MatScalar      *aa;
3536   Mat_SeqAIJ     *aij;
3537   PetscInt       *garray = a->garray, *colsub, Ncols;
3538   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3539   IS              iscol_sub, iscmap;
3540   const PetscInt *is_idx, *cmap;
3541   PetscBool       allcolumns = PETSC_FALSE;
3542   MPI_Comm        comm;
3543 
3544   PetscFunctionBegin;
3545   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3546   if (call == MAT_REUSE_MATRIX) {
3547     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3548     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3549     PetscCall(ISGetLocalSize(iscol_sub, &count));
3550 
3551     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3552     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3553 
3554     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3555     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3556 
3557     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3558 
3559   } else { /* call == MAT_INITIAL_MATRIX) */
3560     PetscBool flg;
3561 
3562     PetscCall(ISGetLocalSize(iscol, &n));
3563     PetscCall(ISGetSize(iscol, &Ncols));
3564 
3565     /* (1) iscol -> nonscalable iscol_local */
3566     /* Check for special case: each processor gets entire matrix columns */
3567     PetscCall(ISIdentity(iscol_local, &flg));
3568     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3569     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3570     if (allcolumns) {
3571       iscol_sub = iscol_local;
3572       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3573       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3574 
3575     } else {
3576       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3577       PetscInt *idx, *cmap1, k;
3578       PetscCall(PetscMalloc1(Ncols, &idx));
3579       PetscCall(PetscMalloc1(Ncols, &cmap1));
3580       PetscCall(ISGetIndices(iscol_local, &is_idx));
3581       count = 0;
3582       k     = 0;
3583       for (i = 0; i < Ncols; i++) {
3584         j = is_idx[i];
3585         if (j >= cstart && j < cend) {
3586           /* diagonal part of mat */
3587           idx[count]     = j;
3588           cmap1[count++] = i; /* column index in submat */
3589         } else if (Bn) {
3590           /* off-diagonal part of mat */
3591           if (j == garray[k]) {
3592             idx[count]     = j;
3593             cmap1[count++] = i; /* column index in submat */
3594           } else if (j > garray[k]) {
3595             while (j > garray[k] && k < Bn - 1) k++;
3596             if (j == garray[k]) {
3597               idx[count]     = j;
3598               cmap1[count++] = i; /* column index in submat */
3599             }
3600           }
3601         }
3602       }
3603       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3604 
3605       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3606       PetscCall(ISGetBlockSize(iscol, &cbs));
3607       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3608 
3609       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3610     }
3611 
3612     /* (3) Create sequential Msub */
3613     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3614   }
3615 
3616   PetscCall(ISGetLocalSize(iscol_sub, &count));
3617   aij = (Mat_SeqAIJ *)(Msub)->data;
3618   ii  = aij->i;
3619   PetscCall(ISGetIndices(iscmap, &cmap));
3620 
3621   /*
3622       m - number of local rows
3623       Ncols - number of columns (same on all processors)
3624       rstart - first row in new global matrix generated
3625   */
3626   PetscCall(MatGetSize(Msub, &m, NULL));
3627 
3628   if (call == MAT_INITIAL_MATRIX) {
3629     /* (4) Create parallel newmat */
3630     PetscMPIInt rank, size;
3631     PetscInt    csize;
3632 
3633     PetscCallMPI(MPI_Comm_size(comm, &size));
3634     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3635 
3636     /*
3637         Determine the number of non-zeros in the diagonal and off-diagonal
3638         portions of the matrix in order to do correct preallocation
3639     */
3640 
3641     /* first get start and end of "diagonal" columns */
3642     PetscCall(ISGetLocalSize(iscol, &csize));
3643     if (csize == PETSC_DECIDE) {
3644       PetscCall(ISGetSize(isrow, &mglobal));
3645       if (mglobal == Ncols) { /* square matrix */
3646         nlocal = m;
3647       } else {
3648         nlocal = Ncols / size + ((Ncols % size) > rank);
3649       }
3650     } else {
3651       nlocal = csize;
3652     }
3653     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3654     rstart = rend - nlocal;
3655     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3656 
3657     /* next, compute all the lengths */
3658     jj = aij->j;
3659     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3660     olens = dlens + m;
3661     for (i = 0; i < m; i++) {
3662       jend = ii[i + 1] - ii[i];
3663       olen = 0;
3664       dlen = 0;
3665       for (j = 0; j < jend; j++) {
3666         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3667         else dlen++;
3668         jj++;
3669       }
3670       olens[i] = olen;
3671       dlens[i] = dlen;
3672     }
3673 
3674     PetscCall(ISGetBlockSize(isrow, &bs));
3675     PetscCall(ISGetBlockSize(iscol, &cbs));
3676 
3677     PetscCall(MatCreate(comm, &M));
3678     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3679     PetscCall(MatSetBlockSizes(M, bs, cbs));
3680     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3681     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3682     PetscCall(PetscFree(dlens));
3683 
3684   } else { /* call == MAT_REUSE_MATRIX */
3685     M = *newmat;
3686     PetscCall(MatGetLocalSize(M, &i, NULL));
3687     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3688     PetscCall(MatZeroEntries(M));
3689     /*
3690          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3691        rather than the slower MatSetValues().
3692     */
3693     M->was_assembled = PETSC_TRUE;
3694     M->assembled     = PETSC_FALSE;
3695   }
3696 
3697   /* (5) Set values of Msub to *newmat */
3698   PetscCall(PetscMalloc1(count, &colsub));
3699   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3700 
3701   jj = aij->j;
3702   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3703   for (i = 0; i < m; i++) {
3704     row = rstart + i;
3705     nz  = ii[i + 1] - ii[i];
3706     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3707     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3708     jj += nz;
3709     aa += nz;
3710   }
3711   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3712   PetscCall(ISRestoreIndices(iscmap, &cmap));
3713 
3714   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3715   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3716 
3717   PetscCall(PetscFree(colsub));
3718 
3719   /* save Msub, iscol_sub and iscmap used in processor for next request */
3720   if (call == MAT_INITIAL_MATRIX) {
3721     *newmat = M;
3722     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3723     PetscCall(MatDestroy(&Msub));
3724 
3725     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3726     PetscCall(ISDestroy(&iscol_sub));
3727 
3728     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3729     PetscCall(ISDestroy(&iscmap));
3730 
3731     if (iscol_local) {
3732       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3733       PetscCall(ISDestroy(&iscol_local));
3734     }
3735   }
3736   PetscFunctionReturn(PETSC_SUCCESS);
3737 }
3738 
3739 /*
3740     Not great since it makes two copies of the submatrix, first an SeqAIJ
3741   in local and then by concatenating the local matrices the end result.
3742   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3743 
3744   This requires a sequential iscol with all indices.
3745 */
3746 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3747 {
3748   PetscMPIInt rank, size;
3749   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3750   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3751   Mat         M, Mreuse;
3752   MatScalar  *aa, *vwork;
3753   MPI_Comm    comm;
3754   Mat_SeqAIJ *aij;
3755   PetscBool   colflag, allcolumns = PETSC_FALSE;
3756 
3757   PetscFunctionBegin;
3758   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3759   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3760   PetscCallMPI(MPI_Comm_size(comm, &size));
3761 
3762   /* Check for special case: each processor gets entire matrix columns */
3763   PetscCall(ISIdentity(iscol, &colflag));
3764   PetscCall(ISGetLocalSize(iscol, &n));
3765   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3766   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3767 
3768   if (call == MAT_REUSE_MATRIX) {
3769     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3770     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3771     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3772   } else {
3773     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3774   }
3775 
3776   /*
3777       m - number of local rows
3778       n - number of columns (same on all processors)
3779       rstart - first row in new global matrix generated
3780   */
3781   PetscCall(MatGetSize(Mreuse, &m, &n));
3782   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3783   if (call == MAT_INITIAL_MATRIX) {
3784     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3785     ii  = aij->i;
3786     jj  = aij->j;
3787 
3788     /*
3789         Determine the number of non-zeros in the diagonal and off-diagonal
3790         portions of the matrix in order to do correct preallocation
3791     */
3792 
3793     /* first get start and end of "diagonal" columns */
3794     if (csize == PETSC_DECIDE) {
3795       PetscCall(ISGetSize(isrow, &mglobal));
3796       if (mglobal == n) { /* square matrix */
3797         nlocal = m;
3798       } else {
3799         nlocal = n / size + ((n % size) > rank);
3800       }
3801     } else {
3802       nlocal = csize;
3803     }
3804     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3805     rstart = rend - nlocal;
3806     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3807 
3808     /* next, compute all the lengths */
3809     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3810     olens = dlens + m;
3811     for (i = 0; i < m; i++) {
3812       jend = ii[i + 1] - ii[i];
3813       olen = 0;
3814       dlen = 0;
3815       for (j = 0; j < jend; j++) {
3816         if (*jj < rstart || *jj >= rend) olen++;
3817         else dlen++;
3818         jj++;
3819       }
3820       olens[i] = olen;
3821       dlens[i] = dlen;
3822     }
3823     PetscCall(MatCreate(comm, &M));
3824     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3825     PetscCall(MatSetBlockSizes(M, bs, cbs));
3826     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3827     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3828     PetscCall(PetscFree(dlens));
3829   } else {
3830     PetscInt ml, nl;
3831 
3832     M = *newmat;
3833     PetscCall(MatGetLocalSize(M, &ml, &nl));
3834     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3835     PetscCall(MatZeroEntries(M));
3836     /*
3837          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3838        rather than the slower MatSetValues().
3839     */
3840     M->was_assembled = PETSC_TRUE;
3841     M->assembled     = PETSC_FALSE;
3842   }
3843   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3844   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3845   ii  = aij->i;
3846   jj  = aij->j;
3847 
3848   /* trigger copy to CPU if needed */
3849   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3850   for (i = 0; i < m; i++) {
3851     row   = rstart + i;
3852     nz    = ii[i + 1] - ii[i];
3853     cwork = jj;
3854     jj    = PetscSafePointerPlusOffset(jj, nz);
3855     vwork = aa;
3856     aa    = PetscSafePointerPlusOffset(aa, nz);
3857     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3858   }
3859   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3860 
3861   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3862   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3863   *newmat = M;
3864 
3865   /* save submatrix used in processor for next request */
3866   if (call == MAT_INITIAL_MATRIX) {
3867     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3868     PetscCall(MatDestroy(&Mreuse));
3869   }
3870   PetscFunctionReturn(PETSC_SUCCESS);
3871 }
3872 
3873 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3874 {
3875   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3876   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3877   const PetscInt *JJ;
3878   PetscBool       nooffprocentries;
3879   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3880 
3881   PetscFunctionBegin;
3882   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3883 
3884   PetscCall(PetscLayoutSetUp(B->rmap));
3885   PetscCall(PetscLayoutSetUp(B->cmap));
3886   m      = B->rmap->n;
3887   cstart = B->cmap->rstart;
3888   cend   = B->cmap->rend;
3889   rstart = B->rmap->rstart;
3890 
3891   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3892 
3893   if (PetscDefined(USE_DEBUG)) {
3894     for (i = 0; i < m; i++) {
3895       nnz = Ii[i + 1] - Ii[i];
3896       JJ  = PetscSafePointerPlusOffset(J, Ii[i]);
3897       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3898       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3899       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3900     }
3901   }
3902 
3903   for (i = 0; i < m; i++) {
3904     nnz     = Ii[i + 1] - Ii[i];
3905     JJ      = PetscSafePointerPlusOffset(J, Ii[i]);
3906     nnz_max = PetscMax(nnz_max, nnz);
3907     d       = 0;
3908     for (j = 0; j < nnz; j++) {
3909       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3910     }
3911     d_nnz[i] = d;
3912     o_nnz[i] = nnz - d;
3913   }
3914   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3915   PetscCall(PetscFree2(d_nnz, o_nnz));
3916 
3917   for (i = 0; i < m; i++) {
3918     ii = i + rstart;
3919     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES));
3920   }
3921   nooffprocentries    = B->nooffprocentries;
3922   B->nooffprocentries = PETSC_TRUE;
3923   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3924   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3925   B->nooffprocentries = nooffprocentries;
3926 
3927   /* count number of entries below block diagonal */
3928   PetscCall(PetscFree(Aij->ld));
3929   PetscCall(PetscCalloc1(m, &ld));
3930   Aij->ld = ld;
3931   for (i = 0; i < m; i++) {
3932     nnz = Ii[i + 1] - Ii[i];
3933     j   = 0;
3934     while (j < nnz && J[j] < cstart) j++;
3935     ld[i] = j;
3936     if (J) J += nnz;
3937   }
3938 
3939   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3940   PetscFunctionReturn(PETSC_SUCCESS);
3941 }
3942 
3943 /*@
3944   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3945   (the default parallel PETSc format).
3946 
3947   Collective
3948 
3949   Input Parameters:
3950 + B - the matrix
3951 . i - the indices into j for the start of each local row (starts with zero)
3952 . j - the column indices for each local row (starts with zero)
3953 - v - optional values in the matrix
3954 
3955   Level: developer
3956 
3957   Notes:
3958   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3959   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3960   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3961 
3962   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3963 
3964   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3965 
3966   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3967 
3968   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3969   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3970 
3971   The format which is used for the sparse matrix input, is equivalent to a
3972   row-major ordering.. i.e for the following matrix, the input data expected is
3973   as shown
3974 .vb
3975         1 0 0
3976         2 0 3     P0
3977        -------
3978         4 5 6     P1
3979 
3980      Process0 [P0] rows_owned=[0,1]
3981         i =  {0,1,3}  [size = nrow+1  = 2+1]
3982         j =  {0,0,2}  [size = 3]
3983         v =  {1,2,3}  [size = 3]
3984 
3985      Process1 [P1] rows_owned=[2]
3986         i =  {0,3}    [size = nrow+1  = 1+1]
3987         j =  {0,1,2}  [size = 3]
3988         v =  {4,5,6}  [size = 3]
3989 .ve
3990 
3991 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
3992           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
3993 @*/
3994 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3995 {
3996   PetscFunctionBegin;
3997   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
3998   PetscFunctionReturn(PETSC_SUCCESS);
3999 }
4000 
4001 /*@C
4002   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4003   (the default parallel PETSc format).  For good matrix assembly performance
4004   the user should preallocate the matrix storage by setting the parameters
4005   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4006 
4007   Collective
4008 
4009   Input Parameters:
4010 + B     - the matrix
4011 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4012            (same value is used for all local rows)
4013 . d_nnz - array containing the number of nonzeros in the various rows of the
4014            DIAGONAL portion of the local submatrix (possibly different for each row)
4015            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4016            The size of this array is equal to the number of local rows, i.e 'm'.
4017            For matrices that will be factored, you must leave room for (and set)
4018            the diagonal entry even if it is zero.
4019 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4020            submatrix (same value is used for all local rows).
4021 - o_nnz - array containing the number of nonzeros in the various rows of the
4022            OFF-DIAGONAL portion of the local submatrix (possibly different for
4023            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4024            structure. The size of this array is equal to the number
4025            of local rows, i.e 'm'.
4026 
4027   Example Usage:
4028   Consider the following 8x8 matrix with 34 non-zero values, that is
4029   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4030   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4031   as follows
4032 
4033 .vb
4034             1  2  0  |  0  3  0  |  0  4
4035     Proc0   0  5  6  |  7  0  0  |  8  0
4036             9  0 10  | 11  0  0  | 12  0
4037     -------------------------------------
4038            13  0 14  | 15 16 17  |  0  0
4039     Proc1   0 18  0  | 19 20 21  |  0  0
4040             0  0  0  | 22 23  0  | 24  0
4041     -------------------------------------
4042     Proc2  25 26 27  |  0  0 28  | 29  0
4043            30  0  0  | 31 32 33  |  0 34
4044 .ve
4045 
4046   This can be represented as a collection of submatrices as
4047 .vb
4048       A B C
4049       D E F
4050       G H I
4051 .ve
4052 
4053   Where the submatrices A,B,C are owned by proc0, D,E,F are
4054   owned by proc1, G,H,I are owned by proc2.
4055 
4056   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4057   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4058   The 'M','N' parameters are 8,8, and have the same values on all procs.
4059 
4060   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4061   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4062   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4063   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4064   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4065   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4066 
4067   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4068   allocated for every row of the local diagonal submatrix, and `o_nz`
4069   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4070   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4071   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4072   In this case, the values of `d_nz`, `o_nz` are
4073 .vb
4074      proc0  dnz = 2, o_nz = 2
4075      proc1  dnz = 3, o_nz = 2
4076      proc2  dnz = 1, o_nz = 4
4077 .ve
4078   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4079   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4080   for proc3. i.e we are using 12+15+10=37 storage locations to store
4081   34 values.
4082 
4083   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4084   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4085   In the above case the values for `d_nnz`, `o_nnz` are
4086 .vb
4087      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4088      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4089      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4090 .ve
4091   Here the space allocated is sum of all the above values i.e 34, and
4092   hence pre-allocation is perfect.
4093 
4094   Level: intermediate
4095 
4096   Notes:
4097   If the *_nnz parameter is given then the *_nz parameter is ignored
4098 
4099   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4100   storage.  The stored row and column indices begin with zero.
4101   See [Sparse Matrices](sec_matsparse) for details.
4102 
4103   The parallel matrix is partitioned such that the first m0 rows belong to
4104   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4105   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4106 
4107   The DIAGONAL portion of the local submatrix of a processor can be defined
4108   as the submatrix which is obtained by extraction the part corresponding to
4109   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4110   first row that belongs to the processor, r2 is the last row belonging to
4111   the this processor, and c1-c2 is range of indices of the local part of a
4112   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4113   common case of a square matrix, the row and column ranges are the same and
4114   the DIAGONAL part is also square. The remaining portion of the local
4115   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4116 
4117   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4118 
4119   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4120   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4121   You can also run with the option `-info` and look for messages with the string
4122   malloc in them to see if additional memory allocation was needed.
4123 
4124 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4125           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4126 @*/
4127 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4128 {
4129   PetscFunctionBegin;
4130   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4131   PetscValidType(B, 1);
4132   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4133   PetscFunctionReturn(PETSC_SUCCESS);
4134 }
4135 
4136 /*@
4137   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4138   CSR format for the local rows.
4139 
4140   Collective
4141 
4142   Input Parameters:
4143 + comm - MPI communicator
4144 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4145 . n    - This value should be the same as the local size used in creating the
4146        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4147        calculated if N is given) For square matrices n is almost always m.
4148 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4149 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4150 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4151 . j    - column indices
4152 - a    - optional matrix values
4153 
4154   Output Parameter:
4155 . mat - the matrix
4156 
4157   Level: intermediate
4158 
4159   Notes:
4160   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4161   thus you CANNOT change the matrix entries by changing the values of a[] after you have
4162   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4163 
4164   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4165 
4166   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArrays()`
4167 
4168   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
4169   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4170 
4171   The format which is used for the sparse matrix input, is equivalent to a
4172   row-major ordering.. i.e for the following matrix, the input data expected is
4173   as shown
4174 .vb
4175         1 0 0
4176         2 0 3     P0
4177        -------
4178         4 5 6     P1
4179 
4180      Process0 [P0] rows_owned=[0,1]
4181         i =  {0,1,3}  [size = nrow+1  = 2+1]
4182         j =  {0,0,2}  [size = 3]
4183         v =  {1,2,3}  [size = 3]
4184 
4185      Process1 [P1] rows_owned=[2]
4186         i =  {0,3}    [size = nrow+1  = 1+1]
4187         j =  {0,1,2}  [size = 3]
4188         v =  {4,5,6}  [size = 3]
4189 .ve
4190 
4191 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4192           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4193 @*/
4194 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4195 {
4196   PetscFunctionBegin;
4197   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4198   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4199   PetscCall(MatCreate(comm, mat));
4200   PetscCall(MatSetSizes(*mat, m, n, M, N));
4201   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4202   PetscCall(MatSetType(*mat, MATMPIAIJ));
4203   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4204   PetscFunctionReturn(PETSC_SUCCESS);
4205 }
4206 
4207 /*@
4208   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4209   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4210   from `MatCreateMPIAIJWithArrays()`
4211 
4212   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4213 
4214   Collective
4215 
4216   Input Parameters:
4217 + mat - the matrix
4218 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4219 . n   - This value should be the same as the local size used in creating the
4220        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4221        calculated if N is given) For square matrices n is almost always m.
4222 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4223 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4224 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4225 . J   - column indices
4226 - v   - matrix values
4227 
4228   Level: deprecated
4229 
4230 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4231           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4232 @*/
4233 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4234 {
4235   PetscInt        nnz, i;
4236   PetscBool       nooffprocentries;
4237   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4238   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4239   PetscScalar    *ad, *ao;
4240   PetscInt        ldi, Iii, md;
4241   const PetscInt *Adi = Ad->i;
4242   PetscInt       *ld  = Aij->ld;
4243 
4244   PetscFunctionBegin;
4245   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4246   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4247   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4248   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4249 
4250   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4251   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4252 
4253   for (i = 0; i < m; i++) {
4254     if (PetscDefined(USE_DEBUG)) {
4255       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4256         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4257         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4258       }
4259     }
4260     nnz = Ii[i + 1] - Ii[i];
4261     Iii = Ii[i];
4262     ldi = ld[i];
4263     md  = Adi[i + 1] - Adi[i];
4264     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4265     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4266     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4267     ad += md;
4268     ao += nnz - md;
4269   }
4270   nooffprocentries      = mat->nooffprocentries;
4271   mat->nooffprocentries = PETSC_TRUE;
4272   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4273   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4274   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4275   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4276   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4277   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4278   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4279   mat->nooffprocentries = nooffprocentries;
4280   PetscFunctionReturn(PETSC_SUCCESS);
4281 }
4282 
4283 /*@
4284   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4285 
4286   Collective
4287 
4288   Input Parameters:
4289 + mat - the matrix
4290 - v   - matrix values, stored by row
4291 
4292   Level: intermediate
4293 
4294   Notes:
4295   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4296 
4297   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4298 
4299 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4300           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4301 @*/
4302 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4303 {
4304   PetscInt        nnz, i, m;
4305   PetscBool       nooffprocentries;
4306   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4307   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4308   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4309   PetscScalar    *ad, *ao;
4310   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4311   PetscInt        ldi, Iii, md;
4312   PetscInt       *ld = Aij->ld;
4313 
4314   PetscFunctionBegin;
4315   m = mat->rmap->n;
4316 
4317   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4318   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4319   Iii = 0;
4320   for (i = 0; i < m; i++) {
4321     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4322     ldi = ld[i];
4323     md  = Adi[i + 1] - Adi[i];
4324     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4325     ad += md;
4326     if (ao) {
4327       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4328       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4329       ao += nnz - md;
4330     }
4331     Iii += nnz;
4332   }
4333   nooffprocentries      = mat->nooffprocentries;
4334   mat->nooffprocentries = PETSC_TRUE;
4335   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4336   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4337   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4338   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4339   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4340   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4341   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4342   mat->nooffprocentries = nooffprocentries;
4343   PetscFunctionReturn(PETSC_SUCCESS);
4344 }
4345 
4346 /*@C
4347   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4348   (the default parallel PETSc format).  For good matrix assembly performance
4349   the user should preallocate the matrix storage by setting the parameters
4350   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4351 
4352   Collective
4353 
4354   Input Parameters:
4355 + comm  - MPI communicator
4356 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4357            This value should be the same as the local size used in creating the
4358            y vector for the matrix-vector product y = Ax.
4359 . n     - This value should be the same as the local size used in creating the
4360        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4361        calculated if N is given) For square matrices n is almost always m.
4362 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4363 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4364 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4365            (same value is used for all local rows)
4366 . d_nnz - array containing the number of nonzeros in the various rows of the
4367            DIAGONAL portion of the local submatrix (possibly different for each row)
4368            or `NULL`, if `d_nz` is used to specify the nonzero structure.
4369            The size of this array is equal to the number of local rows, i.e 'm'.
4370 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4371            submatrix (same value is used for all local rows).
4372 - o_nnz - array containing the number of nonzeros in the various rows of the
4373            OFF-DIAGONAL portion of the local submatrix (possibly different for
4374            each row) or `NULL`, if `o_nz` is used to specify the nonzero
4375            structure. The size of this array is equal to the number
4376            of local rows, i.e 'm'.
4377 
4378   Output Parameter:
4379 . A - the matrix
4380 
4381   Options Database Keys:
4382 + -mat_no_inode                     - Do not use inodes
4383 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4384 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4385         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4386         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4387 
4388   Level: intermediate
4389 
4390   Notes:
4391   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4392   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4393   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4394 
4395   If the *_nnz parameter is given then the *_nz parameter is ignored
4396 
4397   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4398   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4399   storage requirements for this matrix.
4400 
4401   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4402   processor than it must be used on all processors that share the object for
4403   that argument.
4404 
4405   The user MUST specify either the local or global matrix dimensions
4406   (possibly both).
4407 
4408   The parallel matrix is partitioned across processors such that the
4409   first m0 rows belong to process 0, the next m1 rows belong to
4410   process 1, the next m2 rows belong to process 2 etc.. where
4411   m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4412   values corresponding to [m x N] submatrix.
4413 
4414   The columns are logically partitioned with the n0 columns belonging
4415   to 0th partition, the next n1 columns belonging to the next
4416   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4417 
4418   The DIAGONAL portion of the local submatrix on any given processor
4419   is the submatrix corresponding to the rows and columns m,n
4420   corresponding to the given processor. i.e diagonal matrix on
4421   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4422   etc. The remaining portion of the local submatrix [m x (N-n)]
4423   constitute the OFF-DIAGONAL portion. The example below better
4424   illustrates this concept.
4425 
4426   For a square global matrix we define each processor's diagonal portion
4427   to be its local rows and the corresponding columns (a square submatrix);
4428   each processor's off-diagonal portion encompasses the remainder of the
4429   local matrix (a rectangular submatrix).
4430 
4431   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4432 
4433   When calling this routine with a single process communicator, a matrix of
4434   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4435   type of communicator, use the construction mechanism
4436 .vb
4437   MatCreate(..., &A);
4438   MatSetType(A, MATMPIAIJ);
4439   MatSetSizes(A, m, n, M, N);
4440   MatMPIAIJSetPreallocation(A, ...);
4441 .ve
4442 
4443   By default, this format uses inodes (identical nodes) when possible.
4444   We search for consecutive rows with the same nonzero structure, thereby
4445   reusing matrix information to achieve increased efficiency.
4446 
4447   Example Usage:
4448   Consider the following 8x8 matrix with 34 non-zero values, that is
4449   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4450   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4451   as follows
4452 
4453 .vb
4454             1  2  0  |  0  3  0  |  0  4
4455     Proc0   0  5  6  |  7  0  0  |  8  0
4456             9  0 10  | 11  0  0  | 12  0
4457     -------------------------------------
4458            13  0 14  | 15 16 17  |  0  0
4459     Proc1   0 18  0  | 19 20 21  |  0  0
4460             0  0  0  | 22 23  0  | 24  0
4461     -------------------------------------
4462     Proc2  25 26 27  |  0  0 28  | 29  0
4463            30  0  0  | 31 32 33  |  0 34
4464 .ve
4465 
4466   This can be represented as a collection of submatrices as
4467 
4468 .vb
4469       A B C
4470       D E F
4471       G H I
4472 .ve
4473 
4474   Where the submatrices A,B,C are owned by proc0, D,E,F are
4475   owned by proc1, G,H,I are owned by proc2.
4476 
4477   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4478   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4479   The 'M','N' parameters are 8,8, and have the same values on all procs.
4480 
4481   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4482   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4483   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4484   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4485   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4486   matrix, ans [DF] as another SeqAIJ matrix.
4487 
4488   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4489   allocated for every row of the local diagonal submatrix, and `o_nz`
4490   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4491   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4492   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4493   In this case, the values of `d_nz`,`o_nz` are
4494 .vb
4495      proc0  dnz = 2, o_nz = 2
4496      proc1  dnz = 3, o_nz = 2
4497      proc2  dnz = 1, o_nz = 4
4498 .ve
4499   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4500   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4501   for proc3. i.e we are using 12+15+10=37 storage locations to store
4502   34 values.
4503 
4504   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4505   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4506   In the above case the values for d_nnz,o_nnz are
4507 .vb
4508      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4509      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4510      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4511 .ve
4512   Here the space allocated is sum of all the above values i.e 34, and
4513   hence pre-allocation is perfect.
4514 
4515 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4516           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4517 @*/
4518 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4519 {
4520   PetscMPIInt size;
4521 
4522   PetscFunctionBegin;
4523   PetscCall(MatCreate(comm, A));
4524   PetscCall(MatSetSizes(*A, m, n, M, N));
4525   PetscCallMPI(MPI_Comm_size(comm, &size));
4526   if (size > 1) {
4527     PetscCall(MatSetType(*A, MATMPIAIJ));
4528     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4529   } else {
4530     PetscCall(MatSetType(*A, MATSEQAIJ));
4531     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4532   }
4533   PetscFunctionReturn(PETSC_SUCCESS);
4534 }
4535 
4536 /*MC
4537     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4538 
4539     Synopsis:
4540     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4541 
4542     Not Collective
4543 
4544     Input Parameter:
4545 .   A - the `MATMPIAIJ` matrix
4546 
4547     Output Parameters:
4548 +   Ad - the diagonal portion of the matrix
4549 .   Ao - the off-diagonal portion of the matrix
4550 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4551 -   ierr - error code
4552 
4553      Level: advanced
4554 
4555     Note:
4556     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4557 
4558 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4559 M*/
4560 
4561 /*MC
4562     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4563 
4564     Synopsis:
4565     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4566 
4567     Not Collective
4568 
4569     Input Parameters:
4570 +   A - the `MATMPIAIJ` matrix
4571 .   Ad - the diagonal portion of the matrix
4572 .   Ao - the off-diagonal portion of the matrix
4573 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4574 -   ierr - error code
4575 
4576      Level: advanced
4577 
4578 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4579 M*/
4580 
4581 /*@C
4582   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4583 
4584   Not Collective
4585 
4586   Input Parameter:
4587 . A - The `MATMPIAIJ` matrix
4588 
4589   Output Parameters:
4590 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4591 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4592 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4593 
4594   Level: intermediate
4595 
4596   Note:
4597   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4598   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4599   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4600   local column numbers to global column numbers in the original matrix.
4601 
4602   Fortran Notes:
4603   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4604 
4605 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4606 @*/
4607 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4608 {
4609   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4610   PetscBool   flg;
4611 
4612   PetscFunctionBegin;
4613   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4614   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4615   if (Ad) *Ad = a->A;
4616   if (Ao) *Ao = a->B;
4617   if (colmap) *colmap = a->garray;
4618   PetscFunctionReturn(PETSC_SUCCESS);
4619 }
4620 
4621 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4622 {
4623   PetscInt     m, N, i, rstart, nnz, Ii;
4624   PetscInt    *indx;
4625   PetscScalar *values;
4626   MatType      rootType;
4627 
4628   PetscFunctionBegin;
4629   PetscCall(MatGetSize(inmat, &m, &N));
4630   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4631     PetscInt *dnz, *onz, sum, bs, cbs;
4632 
4633     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4634     /* Check sum(n) = N */
4635     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4636     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4637 
4638     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4639     rstart -= m;
4640 
4641     MatPreallocateBegin(comm, m, n, dnz, onz);
4642     for (i = 0; i < m; i++) {
4643       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4644       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4645       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4646     }
4647 
4648     PetscCall(MatCreate(comm, outmat));
4649     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4650     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4651     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4652     PetscCall(MatGetRootType_Private(inmat, &rootType));
4653     PetscCall(MatSetType(*outmat, rootType));
4654     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4655     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4656     MatPreallocateEnd(dnz, onz);
4657     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4658   }
4659 
4660   /* numeric phase */
4661   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4662   for (i = 0; i < m; i++) {
4663     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4664     Ii = i + rstart;
4665     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4666     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4667   }
4668   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4669   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4670   PetscFunctionReturn(PETSC_SUCCESS);
4671 }
4672 
4673 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4674 {
4675   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4676 
4677   PetscFunctionBegin;
4678   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4679   PetscCall(PetscFree(merge->id_r));
4680   PetscCall(PetscFree(merge->len_s));
4681   PetscCall(PetscFree(merge->len_r));
4682   PetscCall(PetscFree(merge->bi));
4683   PetscCall(PetscFree(merge->bj));
4684   PetscCall(PetscFree(merge->buf_ri[0]));
4685   PetscCall(PetscFree(merge->buf_ri));
4686   PetscCall(PetscFree(merge->buf_rj[0]));
4687   PetscCall(PetscFree(merge->buf_rj));
4688   PetscCall(PetscFree(merge->coi));
4689   PetscCall(PetscFree(merge->coj));
4690   PetscCall(PetscFree(merge->owners_co));
4691   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4692   PetscCall(PetscFree(merge));
4693   PetscFunctionReturn(PETSC_SUCCESS);
4694 }
4695 
4696 #include <../src/mat/utils/freespace.h>
4697 #include <petscbt.h>
4698 
4699 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4700 {
4701   MPI_Comm             comm;
4702   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4703   PetscMPIInt          size, rank, taga, *len_s;
4704   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4705   PetscInt             proc, m;
4706   PetscInt           **buf_ri, **buf_rj;
4707   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4708   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4709   MPI_Request         *s_waits, *r_waits;
4710   MPI_Status          *status;
4711   const MatScalar     *aa, *a_a;
4712   MatScalar          **abuf_r, *ba_i;
4713   Mat_Merge_SeqsToMPI *merge;
4714   PetscContainer       container;
4715 
4716   PetscFunctionBegin;
4717   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4718   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4719 
4720   PetscCallMPI(MPI_Comm_size(comm, &size));
4721   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4722 
4723   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4724   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4725   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4726   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4727   aa = a_a;
4728 
4729   bi     = merge->bi;
4730   bj     = merge->bj;
4731   buf_ri = merge->buf_ri;
4732   buf_rj = merge->buf_rj;
4733 
4734   PetscCall(PetscMalloc1(size, &status));
4735   owners = merge->rowmap->range;
4736   len_s  = merge->len_s;
4737 
4738   /* send and recv matrix values */
4739   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4740   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4741 
4742   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4743   for (proc = 0, k = 0; proc < size; proc++) {
4744     if (!len_s[proc]) continue;
4745     i = owners[proc];
4746     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4747     k++;
4748   }
4749 
4750   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4751   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4752   PetscCall(PetscFree(status));
4753 
4754   PetscCall(PetscFree(s_waits));
4755   PetscCall(PetscFree(r_waits));
4756 
4757   /* insert mat values of mpimat */
4758   PetscCall(PetscMalloc1(N, &ba_i));
4759   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4760 
4761   for (k = 0; k < merge->nrecv; k++) {
4762     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4763     nrows       = *(buf_ri_k[k]);
4764     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4765     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4766   }
4767 
4768   /* set values of ba */
4769   m = merge->rowmap->n;
4770   for (i = 0; i < m; i++) {
4771     arow = owners[rank] + i;
4772     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4773     bnzi = bi[i + 1] - bi[i];
4774     PetscCall(PetscArrayzero(ba_i, bnzi));
4775 
4776     /* add local non-zero vals of this proc's seqmat into ba */
4777     anzi   = ai[arow + 1] - ai[arow];
4778     aj     = a->j + ai[arow];
4779     aa     = a_a + ai[arow];
4780     nextaj = 0;
4781     for (j = 0; nextaj < anzi; j++) {
4782       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4783         ba_i[j] += aa[nextaj++];
4784       }
4785     }
4786 
4787     /* add received vals into ba */
4788     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4789       /* i-th row */
4790       if (i == *nextrow[k]) {
4791         anzi   = *(nextai[k] + 1) - *nextai[k];
4792         aj     = buf_rj[k] + *(nextai[k]);
4793         aa     = abuf_r[k] + *(nextai[k]);
4794         nextaj = 0;
4795         for (j = 0; nextaj < anzi; j++) {
4796           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4797             ba_i[j] += aa[nextaj++];
4798           }
4799         }
4800         nextrow[k]++;
4801         nextai[k]++;
4802       }
4803     }
4804     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4805   }
4806   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4807   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4808   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4809 
4810   PetscCall(PetscFree(abuf_r[0]));
4811   PetscCall(PetscFree(abuf_r));
4812   PetscCall(PetscFree(ba_i));
4813   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4814   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4815   PetscFunctionReturn(PETSC_SUCCESS);
4816 }
4817 
4818 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4819 {
4820   Mat                  B_mpi;
4821   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4822   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4823   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4824   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4825   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4826   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4827   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4828   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4829   MPI_Status          *status;
4830   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4831   PetscBT              lnkbt;
4832   Mat_Merge_SeqsToMPI *merge;
4833   PetscContainer       container;
4834 
4835   PetscFunctionBegin;
4836   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4837 
4838   /* make sure it is a PETSc comm */
4839   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4840   PetscCallMPI(MPI_Comm_size(comm, &size));
4841   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4842 
4843   PetscCall(PetscNew(&merge));
4844   PetscCall(PetscMalloc1(size, &status));
4845 
4846   /* determine row ownership */
4847   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4848   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4849   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4850   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4851   PetscCall(PetscLayoutSetUp(merge->rowmap));
4852   PetscCall(PetscMalloc1(size, &len_si));
4853   PetscCall(PetscMalloc1(size, &merge->len_s));
4854 
4855   m      = merge->rowmap->n;
4856   owners = merge->rowmap->range;
4857 
4858   /* determine the number of messages to send, their lengths */
4859   len_s = merge->len_s;
4860 
4861   len          = 0; /* length of buf_si[] */
4862   merge->nsend = 0;
4863   for (proc = 0; proc < size; proc++) {
4864     len_si[proc] = 0;
4865     if (proc == rank) {
4866       len_s[proc] = 0;
4867     } else {
4868       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4869       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4870     }
4871     if (len_s[proc]) {
4872       merge->nsend++;
4873       nrows = 0;
4874       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4875         if (ai[i + 1] > ai[i]) nrows++;
4876       }
4877       len_si[proc] = 2 * (nrows + 1);
4878       len += len_si[proc];
4879     }
4880   }
4881 
4882   /* determine the number and length of messages to receive for ij-structure */
4883   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4884   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4885 
4886   /* post the Irecv of j-structure */
4887   PetscCall(PetscCommGetNewTag(comm, &tagj));
4888   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4889 
4890   /* post the Isend of j-structure */
4891   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4892 
4893   for (proc = 0, k = 0; proc < size; proc++) {
4894     if (!len_s[proc]) continue;
4895     i = owners[proc];
4896     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4897     k++;
4898   }
4899 
4900   /* receives and sends of j-structure are complete */
4901   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4902   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4903 
4904   /* send and recv i-structure */
4905   PetscCall(PetscCommGetNewTag(comm, &tagi));
4906   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4907 
4908   PetscCall(PetscMalloc1(len + 1, &buf_s));
4909   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4910   for (proc = 0, k = 0; proc < size; proc++) {
4911     if (!len_s[proc]) continue;
4912     /* form outgoing message for i-structure:
4913          buf_si[0]:                 nrows to be sent
4914                [1:nrows]:           row index (global)
4915                [nrows+1:2*nrows+1]: i-structure index
4916     */
4917     nrows       = len_si[proc] / 2 - 1;
4918     buf_si_i    = buf_si + nrows + 1;
4919     buf_si[0]   = nrows;
4920     buf_si_i[0] = 0;
4921     nrows       = 0;
4922     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4923       anzi = ai[i + 1] - ai[i];
4924       if (anzi) {
4925         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4926         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4927         nrows++;
4928       }
4929     }
4930     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4931     k++;
4932     buf_si += len_si[proc];
4933   }
4934 
4935   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4936   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4937 
4938   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4939   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4940 
4941   PetscCall(PetscFree(len_si));
4942   PetscCall(PetscFree(len_ri));
4943   PetscCall(PetscFree(rj_waits));
4944   PetscCall(PetscFree2(si_waits, sj_waits));
4945   PetscCall(PetscFree(ri_waits));
4946   PetscCall(PetscFree(buf_s));
4947   PetscCall(PetscFree(status));
4948 
4949   /* compute a local seq matrix in each processor */
4950   /* allocate bi array and free space for accumulating nonzero column info */
4951   PetscCall(PetscMalloc1(m + 1, &bi));
4952   bi[0] = 0;
4953 
4954   /* create and initialize a linked list */
4955   nlnk = N + 1;
4956   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4957 
4958   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4959   len = ai[owners[rank + 1]] - ai[owners[rank]];
4960   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4961 
4962   current_space = free_space;
4963 
4964   /* determine symbolic info for each local row */
4965   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4966 
4967   for (k = 0; k < merge->nrecv; k++) {
4968     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4969     nrows       = *buf_ri_k[k];
4970     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4971     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4972   }
4973 
4974   MatPreallocateBegin(comm, m, n, dnz, onz);
4975   len = 0;
4976   for (i = 0; i < m; i++) {
4977     bnzi = 0;
4978     /* add local non-zero cols of this proc's seqmat into lnk */
4979     arow = owners[rank] + i;
4980     anzi = ai[arow + 1] - ai[arow];
4981     aj   = a->j + ai[arow];
4982     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4983     bnzi += nlnk;
4984     /* add received col data into lnk */
4985     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4986       if (i == *nextrow[k]) {            /* i-th row */
4987         anzi = *(nextai[k] + 1) - *nextai[k];
4988         aj   = buf_rj[k] + *nextai[k];
4989         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4990         bnzi += nlnk;
4991         nextrow[k]++;
4992         nextai[k]++;
4993       }
4994     }
4995     if (len < bnzi) len = bnzi; /* =max(bnzi) */
4996 
4997     /* if free space is not available, make more free space */
4998     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
4999     /* copy data into free space, then initialize lnk */
5000     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5001     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5002 
5003     current_space->array += bnzi;
5004     current_space->local_used += bnzi;
5005     current_space->local_remaining -= bnzi;
5006 
5007     bi[i + 1] = bi[i] + bnzi;
5008   }
5009 
5010   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5011 
5012   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5013   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5014   PetscCall(PetscLLDestroy(lnk, lnkbt));
5015 
5016   /* create symbolic parallel matrix B_mpi */
5017   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5018   PetscCall(MatCreate(comm, &B_mpi));
5019   if (n == PETSC_DECIDE) {
5020     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5021   } else {
5022     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5023   }
5024   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5025   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5026   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5027   MatPreallocateEnd(dnz, onz);
5028   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5029 
5030   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5031   B_mpi->assembled = PETSC_FALSE;
5032   merge->bi        = bi;
5033   merge->bj        = bj;
5034   merge->buf_ri    = buf_ri;
5035   merge->buf_rj    = buf_rj;
5036   merge->coi       = NULL;
5037   merge->coj       = NULL;
5038   merge->owners_co = NULL;
5039 
5040   PetscCall(PetscCommDestroy(&comm));
5041 
5042   /* attach the supporting struct to B_mpi for reuse */
5043   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5044   PetscCall(PetscContainerSetPointer(container, merge));
5045   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5046   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5047   PetscCall(PetscContainerDestroy(&container));
5048   *mpimat = B_mpi;
5049 
5050   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5051   PetscFunctionReturn(PETSC_SUCCESS);
5052 }
5053 
5054 /*@C
5055   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5056   matrices from each processor
5057 
5058   Collective
5059 
5060   Input Parameters:
5061 + comm   - the communicators the parallel matrix will live on
5062 . seqmat - the input sequential matrices
5063 . m      - number of local rows (or `PETSC_DECIDE`)
5064 . n      - number of local columns (or `PETSC_DECIDE`)
5065 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5066 
5067   Output Parameter:
5068 . mpimat - the parallel matrix generated
5069 
5070   Level: advanced
5071 
5072   Note:
5073   The dimensions of the sequential matrix in each processor MUST be the same.
5074   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5075   destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5076 
5077 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5078 @*/
5079 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5080 {
5081   PetscMPIInt size;
5082 
5083   PetscFunctionBegin;
5084   PetscCallMPI(MPI_Comm_size(comm, &size));
5085   if (size == 1) {
5086     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5087     if (scall == MAT_INITIAL_MATRIX) {
5088       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5089     } else {
5090       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5091     }
5092     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5093     PetscFunctionReturn(PETSC_SUCCESS);
5094   }
5095   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5096   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5097   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5098   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5099   PetscFunctionReturn(PETSC_SUCCESS);
5100 }
5101 
5102 /*@
5103   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5104 
5105   Not Collective
5106 
5107   Input Parameter:
5108 . A - the matrix
5109 
5110   Output Parameter:
5111 . A_loc - the local sequential matrix generated
5112 
5113   Level: developer
5114 
5115   Notes:
5116   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5117   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5118   `n` is the global column count obtained with `MatGetSize()`
5119 
5120   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5121 
5122   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5123 
5124   Destroy the matrix with `MatDestroy()`
5125 
5126 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5127 @*/
5128 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5129 {
5130   PetscBool mpi;
5131 
5132   PetscFunctionBegin;
5133   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5134   if (mpi) {
5135     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5136   } else {
5137     *A_loc = A;
5138     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5139   }
5140   PetscFunctionReturn(PETSC_SUCCESS);
5141 }
5142 
5143 /*@
5144   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5145 
5146   Not Collective
5147 
5148   Input Parameters:
5149 + A     - the matrix
5150 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5151 
5152   Output Parameter:
5153 . A_loc - the local sequential matrix generated
5154 
5155   Level: developer
5156 
5157   Notes:
5158   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5159   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5160   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5161 
5162   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5163 
5164   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5165   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5166   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5167   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5168 
5169 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5170 @*/
5171 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5172 {
5173   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5174   Mat_SeqAIJ        *mat, *a, *b;
5175   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5176   const PetscScalar *aa, *ba, *aav, *bav;
5177   PetscScalar       *ca, *cam;
5178   PetscMPIInt        size;
5179   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5180   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5181   PetscBool          match;
5182 
5183   PetscFunctionBegin;
5184   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5185   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5186   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5187   if (size == 1) {
5188     if (scall == MAT_INITIAL_MATRIX) {
5189       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5190       *A_loc = mpimat->A;
5191     } else if (scall == MAT_REUSE_MATRIX) {
5192       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5193     }
5194     PetscFunctionReturn(PETSC_SUCCESS);
5195   }
5196 
5197   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5198   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5199   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5200   ai = a->i;
5201   aj = a->j;
5202   bi = b->i;
5203   bj = b->j;
5204   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5205   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5206   aa = aav;
5207   ba = bav;
5208   if (scall == MAT_INITIAL_MATRIX) {
5209     PetscCall(PetscMalloc1(1 + am, &ci));
5210     ci[0] = 0;
5211     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5212     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5213     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5214     k = 0;
5215     for (i = 0; i < am; i++) {
5216       ncols_o = bi[i + 1] - bi[i];
5217       ncols_d = ai[i + 1] - ai[i];
5218       /* off-diagonal portion of A */
5219       for (jo = 0; jo < ncols_o; jo++) {
5220         col = cmap[*bj];
5221         if (col >= cstart) break;
5222         cj[k] = col;
5223         bj++;
5224         ca[k++] = *ba++;
5225       }
5226       /* diagonal portion of A */
5227       for (j = 0; j < ncols_d; j++) {
5228         cj[k]   = cstart + *aj++;
5229         ca[k++] = *aa++;
5230       }
5231       /* off-diagonal portion of A */
5232       for (j = jo; j < ncols_o; j++) {
5233         cj[k]   = cmap[*bj++];
5234         ca[k++] = *ba++;
5235       }
5236     }
5237     /* put together the new matrix */
5238     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5239     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5240     /* Since these are PETSc arrays, change flags to free them as necessary. */
5241     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5242     mat->free_a  = PETSC_TRUE;
5243     mat->free_ij = PETSC_TRUE;
5244     mat->nonew   = 0;
5245   } else if (scall == MAT_REUSE_MATRIX) {
5246     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5247     ci  = mat->i;
5248     cj  = mat->j;
5249     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5250     for (i = 0; i < am; i++) {
5251       /* off-diagonal portion of A */
5252       ncols_o = bi[i + 1] - bi[i];
5253       for (jo = 0; jo < ncols_o; jo++) {
5254         col = cmap[*bj];
5255         if (col >= cstart) break;
5256         *cam++ = *ba++;
5257         bj++;
5258       }
5259       /* diagonal portion of A */
5260       ncols_d = ai[i + 1] - ai[i];
5261       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5262       /* off-diagonal portion of A */
5263       for (j = jo; j < ncols_o; j++) {
5264         *cam++ = *ba++;
5265         bj++;
5266       }
5267     }
5268     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5269   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5270   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5271   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5272   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5273   PetscFunctionReturn(PETSC_SUCCESS);
5274 }
5275 
5276 /*@
5277   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5278   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5279 
5280   Not Collective
5281 
5282   Input Parameters:
5283 + A     - the matrix
5284 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5285 
5286   Output Parameters:
5287 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5288 - A_loc - the local sequential matrix generated
5289 
5290   Level: developer
5291 
5292   Note:
5293   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5294   part, then those associated with the off-diagonal part (in its local ordering)
5295 
5296 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5297 @*/
5298 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5299 {
5300   Mat             Ao, Ad;
5301   const PetscInt *cmap;
5302   PetscMPIInt     size;
5303   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5304 
5305   PetscFunctionBegin;
5306   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5307   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5308   if (size == 1) {
5309     if (scall == MAT_INITIAL_MATRIX) {
5310       PetscCall(PetscObjectReference((PetscObject)Ad));
5311       *A_loc = Ad;
5312     } else if (scall == MAT_REUSE_MATRIX) {
5313       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5314     }
5315     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5316     PetscFunctionReturn(PETSC_SUCCESS);
5317   }
5318   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5319   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5320   if (f) {
5321     PetscCall((*f)(A, scall, glob, A_loc));
5322   } else {
5323     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5324     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5325     Mat_SeqAIJ        *c;
5326     PetscInt          *ai = a->i, *aj = a->j;
5327     PetscInt          *bi = b->i, *bj = b->j;
5328     PetscInt          *ci, *cj;
5329     const PetscScalar *aa, *ba;
5330     PetscScalar       *ca;
5331     PetscInt           i, j, am, dn, on;
5332 
5333     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5334     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5335     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5336     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5337     if (scall == MAT_INITIAL_MATRIX) {
5338       PetscInt k;
5339       PetscCall(PetscMalloc1(1 + am, &ci));
5340       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5341       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5342       ci[0] = 0;
5343       for (i = 0, k = 0; i < am; i++) {
5344         const PetscInt ncols_o = bi[i + 1] - bi[i];
5345         const PetscInt ncols_d = ai[i + 1] - ai[i];
5346         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5347         /* diagonal portion of A */
5348         for (j = 0; j < ncols_d; j++, k++) {
5349           cj[k] = *aj++;
5350           ca[k] = *aa++;
5351         }
5352         /* off-diagonal portion of A */
5353         for (j = 0; j < ncols_o; j++, k++) {
5354           cj[k] = dn + *bj++;
5355           ca[k] = *ba++;
5356         }
5357       }
5358       /* put together the new matrix */
5359       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5360       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5361       /* Since these are PETSc arrays, change flags to free them as necessary. */
5362       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5363       c->free_a  = PETSC_TRUE;
5364       c->free_ij = PETSC_TRUE;
5365       c->nonew   = 0;
5366       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5367     } else if (scall == MAT_REUSE_MATRIX) {
5368       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5369       for (i = 0; i < am; i++) {
5370         const PetscInt ncols_d = ai[i + 1] - ai[i];
5371         const PetscInt ncols_o = bi[i + 1] - bi[i];
5372         /* diagonal portion of A */
5373         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5374         /* off-diagonal portion of A */
5375         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5376       }
5377       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5378     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5379     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5380     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5381     if (glob) {
5382       PetscInt cst, *gidx;
5383 
5384       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5385       PetscCall(PetscMalloc1(dn + on, &gidx));
5386       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5387       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5388       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5389     }
5390   }
5391   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5392   PetscFunctionReturn(PETSC_SUCCESS);
5393 }
5394 
5395 /*@C
5396   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5397 
5398   Not Collective
5399 
5400   Input Parameters:
5401 + A     - the matrix
5402 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5403 . row   - index set of rows to extract (or `NULL`)
5404 - col   - index set of columns to extract (or `NULL`)
5405 
5406   Output Parameter:
5407 . A_loc - the local sequential matrix generated
5408 
5409   Level: developer
5410 
5411 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5412 @*/
5413 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5414 {
5415   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5416   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5417   IS          isrowa, iscola;
5418   Mat        *aloc;
5419   PetscBool   match;
5420 
5421   PetscFunctionBegin;
5422   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5423   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5424   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5425   if (!row) {
5426     start = A->rmap->rstart;
5427     end   = A->rmap->rend;
5428     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5429   } else {
5430     isrowa = *row;
5431   }
5432   if (!col) {
5433     start = A->cmap->rstart;
5434     cmap  = a->garray;
5435     nzA   = a->A->cmap->n;
5436     nzB   = a->B->cmap->n;
5437     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5438     ncols = 0;
5439     for (i = 0; i < nzB; i++) {
5440       if (cmap[i] < start) idx[ncols++] = cmap[i];
5441       else break;
5442     }
5443     imark = i;
5444     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5445     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5446     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5447   } else {
5448     iscola = *col;
5449   }
5450   if (scall != MAT_INITIAL_MATRIX) {
5451     PetscCall(PetscMalloc1(1, &aloc));
5452     aloc[0] = *A_loc;
5453   }
5454   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5455   if (!col) { /* attach global id of condensed columns */
5456     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5457   }
5458   *A_loc = aloc[0];
5459   PetscCall(PetscFree(aloc));
5460   if (!row) PetscCall(ISDestroy(&isrowa));
5461   if (!col) PetscCall(ISDestroy(&iscola));
5462   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5463   PetscFunctionReturn(PETSC_SUCCESS);
5464 }
5465 
5466 /*
5467  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5468  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5469  * on a global size.
5470  * */
5471 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5472 {
5473   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5474   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5475   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5476   PetscMPIInt            owner;
5477   PetscSFNode           *iremote, *oiremote;
5478   const PetscInt        *lrowindices;
5479   PetscSF                sf, osf;
5480   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5481   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5482   MPI_Comm               comm;
5483   ISLocalToGlobalMapping mapping;
5484   const PetscScalar     *pd_a, *po_a;
5485 
5486   PetscFunctionBegin;
5487   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5488   /* plocalsize is the number of roots
5489    * nrows is the number of leaves
5490    * */
5491   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5492   PetscCall(ISGetLocalSize(rows, &nrows));
5493   PetscCall(PetscCalloc1(nrows, &iremote));
5494   PetscCall(ISGetIndices(rows, &lrowindices));
5495   for (i = 0; i < nrows; i++) {
5496     /* Find a remote index and an owner for a row
5497      * The row could be local or remote
5498      * */
5499     owner = 0;
5500     lidx  = 0;
5501     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5502     iremote[i].index = lidx;
5503     iremote[i].rank  = owner;
5504   }
5505   /* Create SF to communicate how many nonzero columns for each row */
5506   PetscCall(PetscSFCreate(comm, &sf));
5507   /* SF will figure out the number of nonzero columns for each row, and their
5508    * offsets
5509    * */
5510   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5511   PetscCall(PetscSFSetFromOptions(sf));
5512   PetscCall(PetscSFSetUp(sf));
5513 
5514   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5515   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5516   PetscCall(PetscCalloc1(nrows, &pnnz));
5517   roffsets[0] = 0;
5518   roffsets[1] = 0;
5519   for (i = 0; i < plocalsize; i++) {
5520     /* diagonal */
5521     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5522     /* off-diagonal */
5523     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5524     /* compute offsets so that we relative location for each row */
5525     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5526     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5527   }
5528   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5529   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5530   /* 'r' means root, and 'l' means leaf */
5531   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5532   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5533   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5534   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5535   PetscCall(PetscSFDestroy(&sf));
5536   PetscCall(PetscFree(roffsets));
5537   PetscCall(PetscFree(nrcols));
5538   dntotalcols = 0;
5539   ontotalcols = 0;
5540   ncol        = 0;
5541   for (i = 0; i < nrows; i++) {
5542     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5543     ncol    = PetscMax(pnnz[i], ncol);
5544     /* diagonal */
5545     dntotalcols += nlcols[i * 2 + 0];
5546     /* off-diagonal */
5547     ontotalcols += nlcols[i * 2 + 1];
5548   }
5549   /* We do not need to figure the right number of columns
5550    * since all the calculations will be done by going through the raw data
5551    * */
5552   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5553   PetscCall(MatSetUp(*P_oth));
5554   PetscCall(PetscFree(pnnz));
5555   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5556   /* diagonal */
5557   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5558   /* off-diagonal */
5559   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5560   /* diagonal */
5561   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5562   /* off-diagonal */
5563   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5564   dntotalcols = 0;
5565   ontotalcols = 0;
5566   ntotalcols  = 0;
5567   for (i = 0; i < nrows; i++) {
5568     owner = 0;
5569     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5570     /* Set iremote for diag matrix */
5571     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5572       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5573       iremote[dntotalcols].rank  = owner;
5574       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5575       ilocal[dntotalcols++] = ntotalcols++;
5576     }
5577     /* off-diagonal */
5578     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5579       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5580       oiremote[ontotalcols].rank  = owner;
5581       oilocal[ontotalcols++]      = ntotalcols++;
5582     }
5583   }
5584   PetscCall(ISRestoreIndices(rows, &lrowindices));
5585   PetscCall(PetscFree(loffsets));
5586   PetscCall(PetscFree(nlcols));
5587   PetscCall(PetscSFCreate(comm, &sf));
5588   /* P serves as roots and P_oth is leaves
5589    * Diag matrix
5590    * */
5591   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5592   PetscCall(PetscSFSetFromOptions(sf));
5593   PetscCall(PetscSFSetUp(sf));
5594 
5595   PetscCall(PetscSFCreate(comm, &osf));
5596   /* off-diagonal */
5597   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5598   PetscCall(PetscSFSetFromOptions(osf));
5599   PetscCall(PetscSFSetUp(osf));
5600   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5601   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5602   /* operate on the matrix internal data to save memory */
5603   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5604   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5605   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5606   /* Convert to global indices for diag matrix */
5607   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5608   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5609   /* We want P_oth store global indices */
5610   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5611   /* Use memory scalable approach */
5612   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5613   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5614   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5615   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5616   /* Convert back to local indices */
5617   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5618   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5619   nout = 0;
5620   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5621   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5622   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5623   /* Exchange values */
5624   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5625   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5626   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5627   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5628   /* Stop PETSc from shrinking memory */
5629   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5630   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5631   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5632   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5633   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5634   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5635   PetscCall(PetscSFDestroy(&sf));
5636   PetscCall(PetscSFDestroy(&osf));
5637   PetscFunctionReturn(PETSC_SUCCESS);
5638 }
5639 
5640 /*
5641  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5642  * This supports MPIAIJ and MAIJ
5643  * */
5644 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5645 {
5646   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5647   Mat_SeqAIJ *p_oth;
5648   IS          rows, map;
5649   PetscHMapI  hamp;
5650   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5651   MPI_Comm    comm;
5652   PetscSF     sf, osf;
5653   PetscBool   has;
5654 
5655   PetscFunctionBegin;
5656   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5657   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5658   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5659    *  and then create a submatrix (that often is an overlapping matrix)
5660    * */
5661   if (reuse == MAT_INITIAL_MATRIX) {
5662     /* Use a hash table to figure out unique keys */
5663     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5664     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5665     count = 0;
5666     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5667     for (i = 0; i < a->B->cmap->n; i++) {
5668       key = a->garray[i] / dof;
5669       PetscCall(PetscHMapIHas(hamp, key, &has));
5670       if (!has) {
5671         mapping[i] = count;
5672         PetscCall(PetscHMapISet(hamp, key, count++));
5673       } else {
5674         /* Current 'i' has the same value the previous step */
5675         mapping[i] = count - 1;
5676       }
5677     }
5678     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5679     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5680     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5681     PetscCall(PetscCalloc1(htsize, &rowindices));
5682     off = 0;
5683     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5684     PetscCall(PetscHMapIDestroy(&hamp));
5685     PetscCall(PetscSortInt(htsize, rowindices));
5686     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5687     /* In case, the matrix was already created but users want to recreate the matrix */
5688     PetscCall(MatDestroy(P_oth));
5689     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5690     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5691     PetscCall(ISDestroy(&map));
5692     PetscCall(ISDestroy(&rows));
5693   } else if (reuse == MAT_REUSE_MATRIX) {
5694     /* If matrix was already created, we simply update values using SF objects
5695      * that as attached to the matrix earlier.
5696      */
5697     const PetscScalar *pd_a, *po_a;
5698 
5699     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5700     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5701     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5702     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5703     /* Update values in place */
5704     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5705     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5706     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5707     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5708     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5709     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5710     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5711     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5712   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5713   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5714   PetscFunctionReturn(PETSC_SUCCESS);
5715 }
5716 
5717 /*@C
5718   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5719 
5720   Collective
5721 
5722   Input Parameters:
5723 + A     - the first matrix in `MATMPIAIJ` format
5724 . B     - the second matrix in `MATMPIAIJ` format
5725 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5726 
5727   Output Parameters:
5728 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5729 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5730 - B_seq - the sequential matrix generated
5731 
5732   Level: developer
5733 
5734 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5735 @*/
5736 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5737 {
5738   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5739   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5740   IS          isrowb, iscolb;
5741   Mat        *bseq = NULL;
5742 
5743   PetscFunctionBegin;
5744   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5745              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5746   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5747 
5748   if (scall == MAT_INITIAL_MATRIX) {
5749     start = A->cmap->rstart;
5750     cmap  = a->garray;
5751     nzA   = a->A->cmap->n;
5752     nzB   = a->B->cmap->n;
5753     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5754     ncols = 0;
5755     for (i = 0; i < nzB; i++) { /* row < local row index */
5756       if (cmap[i] < start) idx[ncols++] = cmap[i];
5757       else break;
5758     }
5759     imark = i;
5760     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5761     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5762     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5763     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5764   } else {
5765     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5766     isrowb = *rowb;
5767     iscolb = *colb;
5768     PetscCall(PetscMalloc1(1, &bseq));
5769     bseq[0] = *B_seq;
5770   }
5771   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5772   *B_seq = bseq[0];
5773   PetscCall(PetscFree(bseq));
5774   if (!rowb) {
5775     PetscCall(ISDestroy(&isrowb));
5776   } else {
5777     *rowb = isrowb;
5778   }
5779   if (!colb) {
5780     PetscCall(ISDestroy(&iscolb));
5781   } else {
5782     *colb = iscolb;
5783   }
5784   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5785   PetscFunctionReturn(PETSC_SUCCESS);
5786 }
5787 
5788 /*
5789     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5790     of the OFF-DIAGONAL portion of local A
5791 
5792     Collective
5793 
5794    Input Parameters:
5795 +    A,B - the matrices in `MATMPIAIJ` format
5796 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5797 
5798    Output Parameter:
5799 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5800 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5801 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5802 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5803 
5804     Developer Note:
5805     This directly accesses information inside the VecScatter associated with the matrix-vector product
5806      for this matrix. This is not desirable..
5807 
5808     Level: developer
5809 
5810 */
5811 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5812 {
5813   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5814   Mat_SeqAIJ        *b_oth;
5815   VecScatter         ctx;
5816   MPI_Comm           comm;
5817   const PetscMPIInt *rprocs, *sprocs;
5818   const PetscInt    *srow, *rstarts, *sstarts;
5819   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5820   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5821   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5822   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5823   PetscMPIInt        size, tag, rank, nreqs;
5824 
5825   PetscFunctionBegin;
5826   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5827   PetscCallMPI(MPI_Comm_size(comm, &size));
5828 
5829   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5830              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5831   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5832   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5833 
5834   if (size == 1) {
5835     startsj_s = NULL;
5836     bufa_ptr  = NULL;
5837     *B_oth    = NULL;
5838     PetscFunctionReturn(PETSC_SUCCESS);
5839   }
5840 
5841   ctx = a->Mvctx;
5842   tag = ((PetscObject)ctx)->tag;
5843 
5844   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5845   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5846   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5847   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5848   PetscCall(PetscMalloc1(nreqs, &reqs));
5849   rwaits = reqs;
5850   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5851 
5852   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5853   if (scall == MAT_INITIAL_MATRIX) {
5854     /* i-array */
5855     /*  post receives */
5856     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5857     for (i = 0; i < nrecvs; i++) {
5858       rowlen = rvalues + rstarts[i] * rbs;
5859       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5860       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5861     }
5862 
5863     /* pack the outgoing message */
5864     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5865 
5866     sstartsj[0] = 0;
5867     rstartsj[0] = 0;
5868     len         = 0; /* total length of j or a array to be sent */
5869     if (nsends) {
5870       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5871       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5872     }
5873     for (i = 0; i < nsends; i++) {
5874       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5875       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5876       for (j = 0; j < nrows; j++) {
5877         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5878         for (l = 0; l < sbs; l++) {
5879           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5880 
5881           rowlen[j * sbs + l] = ncols;
5882 
5883           len += ncols;
5884           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5885         }
5886         k++;
5887       }
5888       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5889 
5890       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5891     }
5892     /* recvs and sends of i-array are completed */
5893     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5894     PetscCall(PetscFree(svalues));
5895 
5896     /* allocate buffers for sending j and a arrays */
5897     PetscCall(PetscMalloc1(len + 1, &bufj));
5898     PetscCall(PetscMalloc1(len + 1, &bufa));
5899 
5900     /* create i-array of B_oth */
5901     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5902 
5903     b_othi[0] = 0;
5904     len       = 0; /* total length of j or a array to be received */
5905     k         = 0;
5906     for (i = 0; i < nrecvs; i++) {
5907       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5908       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5909       for (j = 0; j < nrows; j++) {
5910         b_othi[k + 1] = b_othi[k] + rowlen[j];
5911         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5912         k++;
5913       }
5914       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5915     }
5916     PetscCall(PetscFree(rvalues));
5917 
5918     /* allocate space for j and a arrays of B_oth */
5919     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5920     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5921 
5922     /* j-array */
5923     /*  post receives of j-array */
5924     for (i = 0; i < nrecvs; i++) {
5925       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5926       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5927     }
5928 
5929     /* pack the outgoing message j-array */
5930     if (nsends) k = sstarts[0];
5931     for (i = 0; i < nsends; i++) {
5932       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5933       bufJ  = bufj + sstartsj[i];
5934       for (j = 0; j < nrows; j++) {
5935         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5936         for (ll = 0; ll < sbs; ll++) {
5937           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5938           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5939           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5940         }
5941       }
5942       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5943     }
5944 
5945     /* recvs and sends of j-array are completed */
5946     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5947   } else if (scall == MAT_REUSE_MATRIX) {
5948     sstartsj = *startsj_s;
5949     rstartsj = *startsj_r;
5950     bufa     = *bufa_ptr;
5951     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5952     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5953   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5954 
5955   /* a-array */
5956   /*  post receives of a-array */
5957   for (i = 0; i < nrecvs; i++) {
5958     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5959     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5960   }
5961 
5962   /* pack the outgoing message a-array */
5963   if (nsends) k = sstarts[0];
5964   for (i = 0; i < nsends; i++) {
5965     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5966     bufA  = bufa + sstartsj[i];
5967     for (j = 0; j < nrows; j++) {
5968       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5969       for (ll = 0; ll < sbs; ll++) {
5970         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5971         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5972         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5973       }
5974     }
5975     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5976   }
5977   /* recvs and sends of a-array are completed */
5978   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5979   PetscCall(PetscFree(reqs));
5980 
5981   if (scall == MAT_INITIAL_MATRIX) {
5982     /* put together the new matrix */
5983     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5984 
5985     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5986     /* Since these are PETSc arrays, change flags to free them as necessary. */
5987     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5988     b_oth->free_a  = PETSC_TRUE;
5989     b_oth->free_ij = PETSC_TRUE;
5990     b_oth->nonew   = 0;
5991 
5992     PetscCall(PetscFree(bufj));
5993     if (!startsj_s || !bufa_ptr) {
5994       PetscCall(PetscFree2(sstartsj, rstartsj));
5995       PetscCall(PetscFree(bufa_ptr));
5996     } else {
5997       *startsj_s = sstartsj;
5998       *startsj_r = rstartsj;
5999       *bufa_ptr  = bufa;
6000     }
6001   } else if (scall == MAT_REUSE_MATRIX) {
6002     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6003   }
6004 
6005   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6006   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6007   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6008   PetscFunctionReturn(PETSC_SUCCESS);
6009 }
6010 
6011 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6012 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6013 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6014 #if defined(PETSC_HAVE_MKL_SPARSE)
6015 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6016 #endif
6017 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6018 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6019 #if defined(PETSC_HAVE_ELEMENTAL)
6020 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6021 #endif
6022 #if defined(PETSC_HAVE_SCALAPACK)
6023 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6024 #endif
6025 #if defined(PETSC_HAVE_HYPRE)
6026 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6027 #endif
6028 #if defined(PETSC_HAVE_CUDA)
6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6030 #endif
6031 #if defined(PETSC_HAVE_HIP)
6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6033 #endif
6034 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6036 #endif
6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6038 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6039 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6040 
6041 /*
6042     Computes (B'*A')' since computing B*A directly is untenable
6043 
6044                n                       p                          p
6045         [             ]       [             ]         [                 ]
6046       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6047         [             ]       [             ]         [                 ]
6048 
6049 */
6050 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6051 {
6052   Mat At, Bt, Ct;
6053 
6054   PetscFunctionBegin;
6055   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6056   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6057   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6058   PetscCall(MatDestroy(&At));
6059   PetscCall(MatDestroy(&Bt));
6060   PetscCall(MatTransposeSetPrecursor(Ct, C));
6061   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6062   PetscCall(MatDestroy(&Ct));
6063   PetscFunctionReturn(PETSC_SUCCESS);
6064 }
6065 
6066 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6067 {
6068   PetscBool cisdense;
6069 
6070   PetscFunctionBegin;
6071   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6072   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6073   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6074   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6075   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6076   PetscCall(MatSetUp(C));
6077 
6078   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6079   PetscFunctionReturn(PETSC_SUCCESS);
6080 }
6081 
6082 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6083 {
6084   Mat_Product *product = C->product;
6085   Mat          A = product->A, B = product->B;
6086 
6087   PetscFunctionBegin;
6088   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6089              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6090   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6091   C->ops->productsymbolic = MatProductSymbolic_AB;
6092   PetscFunctionReturn(PETSC_SUCCESS);
6093 }
6094 
6095 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6096 {
6097   Mat_Product *product = C->product;
6098 
6099   PetscFunctionBegin;
6100   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6101   PetscFunctionReturn(PETSC_SUCCESS);
6102 }
6103 
6104 /*
6105    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6106 
6107   Input Parameters:
6108 
6109     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6110     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6111 
6112     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6113 
6114     For Set1, j1[] contains column indices of the nonzeros.
6115     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6116     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6117     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6118 
6119     Similar for Set2.
6120 
6121     This routine merges the two sets of nonzeros row by row and removes repeats.
6122 
6123   Output Parameters: (memory is allocated by the caller)
6124 
6125     i[],j[]: the CSR of the merged matrix, which has m rows.
6126     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6127     imap2[]: similar to imap1[], but for Set2.
6128     Note we order nonzeros row-by-row and from left to right.
6129 */
6130 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6131 {
6132   PetscInt   r, m; /* Row index of mat */
6133   PetscCount t, t1, t2, b1, e1, b2, e2;
6134 
6135   PetscFunctionBegin;
6136   PetscCall(MatGetLocalSize(mat, &m, NULL));
6137   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6138   i[0]        = 0;
6139   for (r = 0; r < m; r++) { /* Do row by row merging */
6140     b1 = rowBegin1[r];
6141     e1 = rowEnd1[r];
6142     b2 = rowBegin2[r];
6143     e2 = rowEnd2[r];
6144     while (b1 < e1 && b2 < e2) {
6145       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6146         j[t]      = j1[b1];
6147         imap1[t1] = t;
6148         imap2[t2] = t;
6149         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6150         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6151         t1++;
6152         t2++;
6153         t++;
6154       } else if (j1[b1] < j2[b2]) {
6155         j[t]      = j1[b1];
6156         imap1[t1] = t;
6157         b1 += jmap1[t1 + 1] - jmap1[t1];
6158         t1++;
6159         t++;
6160       } else {
6161         j[t]      = j2[b2];
6162         imap2[t2] = t;
6163         b2 += jmap2[t2 + 1] - jmap2[t2];
6164         t2++;
6165         t++;
6166       }
6167     }
6168     /* Merge the remaining in either j1[] or j2[] */
6169     while (b1 < e1) {
6170       j[t]      = j1[b1];
6171       imap1[t1] = t;
6172       b1 += jmap1[t1 + 1] - jmap1[t1];
6173       t1++;
6174       t++;
6175     }
6176     while (b2 < e2) {
6177       j[t]      = j2[b2];
6178       imap2[t2] = t;
6179       b2 += jmap2[t2 + 1] - jmap2[t2];
6180       t2++;
6181       t++;
6182     }
6183     i[r + 1] = t;
6184   }
6185   PetscFunctionReturn(PETSC_SUCCESS);
6186 }
6187 
6188 /*
6189   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6190 
6191   Input Parameters:
6192     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6193     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6194       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6195 
6196       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6197       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6198 
6199   Output Parameters:
6200     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6201     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6202       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6203       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6204 
6205     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6206       Atot: number of entries belonging to the diagonal block.
6207       Annz: number of unique nonzeros belonging to the diagonal block.
6208       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6209         repeats (i.e., same 'i,j' pair).
6210       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6211         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6212 
6213       Atot: number of entries belonging to the diagonal block
6214       Annz: number of unique nonzeros belonging to the diagonal block.
6215 
6216     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6217 
6218     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6219 */
6220 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6221 {
6222   PetscInt    cstart, cend, rstart, rend, row, col;
6223   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6224   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6225   PetscCount  k, m, p, q, r, s, mid;
6226   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6227 
6228   PetscFunctionBegin;
6229   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6230   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6231   m = rend - rstart;
6232 
6233   /* Skip negative rows */
6234   for (k = 0; k < n; k++)
6235     if (i[k] >= 0) break;
6236 
6237   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6238      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6239   */
6240   while (k < n) {
6241     row = i[k];
6242     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6243     for (s = k; s < n; s++)
6244       if (i[s] != row) break;
6245 
6246     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6247     for (p = k; p < s; p++) {
6248       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6249       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6250     }
6251     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6252     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6253     rowBegin[row - rstart] = k;
6254     rowMid[row - rstart]   = mid;
6255     rowEnd[row - rstart]   = s;
6256 
6257     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6258     Atot += mid - k;
6259     Btot += s - mid;
6260 
6261     /* Count unique nonzeros of this diag row */
6262     for (p = k; p < mid;) {
6263       col = j[p];
6264       do {
6265         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6266         p++;
6267       } while (p < mid && j[p] == col);
6268       Annz++;
6269     }
6270 
6271     /* Count unique nonzeros of this offdiag row */
6272     for (p = mid; p < s;) {
6273       col = j[p];
6274       do {
6275         p++;
6276       } while (p < s && j[p] == col);
6277       Bnnz++;
6278     }
6279     k = s;
6280   }
6281 
6282   /* Allocation according to Atot, Btot, Annz, Bnnz */
6283   PetscCall(PetscMalloc1(Atot, &Aperm));
6284   PetscCall(PetscMalloc1(Btot, &Bperm));
6285   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6286   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6287 
6288   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6289   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6290   for (r = 0; r < m; r++) {
6291     k   = rowBegin[r];
6292     mid = rowMid[r];
6293     s   = rowEnd[r];
6294     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6295     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6296     Atot += mid - k;
6297     Btot += s - mid;
6298 
6299     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6300     for (p = k; p < mid;) {
6301       col = j[p];
6302       q   = p;
6303       do {
6304         p++;
6305       } while (p < mid && j[p] == col);
6306       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6307       Annz++;
6308     }
6309 
6310     for (p = mid; p < s;) {
6311       col = j[p];
6312       q   = p;
6313       do {
6314         p++;
6315       } while (p < s && j[p] == col);
6316       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6317       Bnnz++;
6318     }
6319   }
6320   /* Output */
6321   *Aperm_ = Aperm;
6322   *Annz_  = Annz;
6323   *Atot_  = Atot;
6324   *Ajmap_ = Ajmap;
6325   *Bperm_ = Bperm;
6326   *Bnnz_  = Bnnz;
6327   *Btot_  = Btot;
6328   *Bjmap_ = Bjmap;
6329   PetscFunctionReturn(PETSC_SUCCESS);
6330 }
6331 
6332 /*
6333   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6334 
6335   Input Parameters:
6336     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6337     nnz:  number of unique nonzeros in the merged matrix
6338     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6339     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6340 
6341   Output Parameter: (memory is allocated by the caller)
6342     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6343 
6344   Example:
6345     nnz1 = 4
6346     nnz  = 6
6347     imap = [1,3,4,5]
6348     jmap = [0,3,5,6,7]
6349    then,
6350     jmap_new = [0,0,3,3,5,6,7]
6351 */
6352 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6353 {
6354   PetscCount k, p;
6355 
6356   PetscFunctionBegin;
6357   jmap_new[0] = 0;
6358   p           = nnz;                /* p loops over jmap_new[] backwards */
6359   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6360     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6361   }
6362   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6363   PetscFunctionReturn(PETSC_SUCCESS);
6364 }
6365 
6366 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6367 {
6368   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6369 
6370   PetscFunctionBegin;
6371   PetscCall(PetscSFDestroy(&coo->sf));
6372   PetscCall(PetscFree(coo->Aperm1));
6373   PetscCall(PetscFree(coo->Bperm1));
6374   PetscCall(PetscFree(coo->Ajmap1));
6375   PetscCall(PetscFree(coo->Bjmap1));
6376   PetscCall(PetscFree(coo->Aimap2));
6377   PetscCall(PetscFree(coo->Bimap2));
6378   PetscCall(PetscFree(coo->Aperm2));
6379   PetscCall(PetscFree(coo->Bperm2));
6380   PetscCall(PetscFree(coo->Ajmap2));
6381   PetscCall(PetscFree(coo->Bjmap2));
6382   PetscCall(PetscFree(coo->Cperm1));
6383   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6384   PetscCall(PetscFree(coo));
6385   PetscFunctionReturn(PETSC_SUCCESS);
6386 }
6387 
6388 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6389 {
6390   MPI_Comm             comm;
6391   PetscMPIInt          rank, size;
6392   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6393   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6394   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6395   PetscContainer       container;
6396   MatCOOStruct_MPIAIJ *coo;
6397 
6398   PetscFunctionBegin;
6399   PetscCall(PetscFree(mpiaij->garray));
6400   PetscCall(VecDestroy(&mpiaij->lvec));
6401 #if defined(PETSC_USE_CTABLE)
6402   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6403 #else
6404   PetscCall(PetscFree(mpiaij->colmap));
6405 #endif
6406   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6407   mat->assembled     = PETSC_FALSE;
6408   mat->was_assembled = PETSC_FALSE;
6409 
6410   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6411   PetscCallMPI(MPI_Comm_size(comm, &size));
6412   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6413   PetscCall(PetscLayoutSetUp(mat->rmap));
6414   PetscCall(PetscLayoutSetUp(mat->cmap));
6415   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6416   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6417   PetscCall(MatGetLocalSize(mat, &m, &n));
6418   PetscCall(MatGetSize(mat, &M, &N));
6419 
6420   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6421   /* entries come first, then local rows, then remote rows.                     */
6422   PetscCount n1 = coo_n, *perm1;
6423   PetscInt  *i1 = coo_i, *j1 = coo_j;
6424 
6425   PetscCall(PetscMalloc1(n1, &perm1));
6426   for (k = 0; k < n1; k++) perm1[k] = k;
6427 
6428   /* Manipulate indices so that entries with negative row or col indices will have smallest
6429      row indices, local entries will have greater but negative row indices, and remote entries
6430      will have positive row indices.
6431   */
6432   for (k = 0; k < n1; k++) {
6433     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6434     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6435     else {
6436       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6437       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6438     }
6439   }
6440 
6441   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6442   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6443 
6444   /* Advance k to the first entry we need to take care of */
6445   for (k = 0; k < n1; k++)
6446     if (i1[k] > PETSC_MIN_INT) break;
6447   PetscInt i1start = k;
6448 
6449   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6450   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6451 
6452   /*           Send remote rows to their owner                                  */
6453   /* Find which rows should be sent to which remote ranks*/
6454   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6455   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6456   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6457   const PetscInt *ranges;
6458   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6459 
6460   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6461   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6462   for (k = rem; k < n1;) {
6463     PetscMPIInt owner;
6464     PetscInt    firstRow, lastRow;
6465 
6466     /* Locate a row range */
6467     firstRow = i1[k]; /* first row of this owner */
6468     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6469     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6470 
6471     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6472     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6473 
6474     /* All entries in [k,p) belong to this remote owner */
6475     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6476       PetscMPIInt *sendto2;
6477       PetscInt    *nentries2;
6478       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6479 
6480       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6481       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6482       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6483       PetscCall(PetscFree2(sendto, nentries2));
6484       sendto   = sendto2;
6485       nentries = nentries2;
6486       maxNsend = maxNsend2;
6487     }
6488     sendto[nsend]   = owner;
6489     nentries[nsend] = p - k;
6490     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6491     nsend++;
6492     k = p;
6493   }
6494 
6495   /* Build 1st SF to know offsets on remote to send data */
6496   PetscSF      sf1;
6497   PetscInt     nroots = 1, nroots2 = 0;
6498   PetscInt     nleaves = nsend, nleaves2 = 0;
6499   PetscInt    *offsets;
6500   PetscSFNode *iremote;
6501 
6502   PetscCall(PetscSFCreate(comm, &sf1));
6503   PetscCall(PetscMalloc1(nsend, &iremote));
6504   PetscCall(PetscMalloc1(nsend, &offsets));
6505   for (k = 0; k < nsend; k++) {
6506     iremote[k].rank  = sendto[k];
6507     iremote[k].index = 0;
6508     nleaves2 += nentries[k];
6509     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6510   }
6511   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6512   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6513   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6514   PetscCall(PetscSFDestroy(&sf1));
6515   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6516 
6517   /* Build 2nd SF to send remote COOs to their owner */
6518   PetscSF sf2;
6519   nroots  = nroots2;
6520   nleaves = nleaves2;
6521   PetscCall(PetscSFCreate(comm, &sf2));
6522   PetscCall(PetscSFSetFromOptions(sf2));
6523   PetscCall(PetscMalloc1(nleaves, &iremote));
6524   p = 0;
6525   for (k = 0; k < nsend; k++) {
6526     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6527     for (q = 0; q < nentries[k]; q++, p++) {
6528       iremote[p].rank  = sendto[k];
6529       iremote[p].index = offsets[k] + q;
6530     }
6531   }
6532   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6533 
6534   /* Send the remote COOs to their owner */
6535   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6536   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6537   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6538   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6539   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6540   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6541   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6542 
6543   PetscCall(PetscFree(offsets));
6544   PetscCall(PetscFree2(sendto, nentries));
6545 
6546   /* Sort received COOs by row along with the permutation array     */
6547   for (k = 0; k < n2; k++) perm2[k] = k;
6548   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6549 
6550   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6551   PetscCount *Cperm1;
6552   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6553   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves));
6554 
6555   /* Support for HYPRE matrices, kind of a hack.
6556      Swap min column with diagonal so that diagonal values will go first */
6557   PetscBool   hypre;
6558   const char *name;
6559   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6560   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6561   if (hypre) {
6562     PetscInt *minj;
6563     PetscBT   hasdiag;
6564 
6565     PetscCall(PetscBTCreate(m, &hasdiag));
6566     PetscCall(PetscMalloc1(m, &minj));
6567     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6568     for (k = i1start; k < rem; k++) {
6569       if (j1[k] < cstart || j1[k] >= cend) continue;
6570       const PetscInt rindex = i1[k] - rstart;
6571       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6572       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6573     }
6574     for (k = 0; k < n2; k++) {
6575       if (j2[k] < cstart || j2[k] >= cend) continue;
6576       const PetscInt rindex = i2[k] - rstart;
6577       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6578       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6579     }
6580     for (k = i1start; k < rem; k++) {
6581       const PetscInt rindex = i1[k] - rstart;
6582       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6583       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6584       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6585     }
6586     for (k = 0; k < n2; k++) {
6587       const PetscInt rindex = i2[k] - rstart;
6588       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6589       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6590       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6591     }
6592     PetscCall(PetscBTDestroy(&hasdiag));
6593     PetscCall(PetscFree(minj));
6594   }
6595 
6596   /* Split local COOs and received COOs into diag/offdiag portions */
6597   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6598   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6599   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6600   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6601   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6602   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6603 
6604   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6605   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6606   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6607   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6608 
6609   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6610   PetscInt *Ai, *Bi;
6611   PetscInt *Aj, *Bj;
6612 
6613   PetscCall(PetscMalloc1(m + 1, &Ai));
6614   PetscCall(PetscMalloc1(m + 1, &Bi));
6615   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6616   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6617 
6618   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6619   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6620   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6621   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6622   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6623 
6624   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6625   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6626 
6627   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6628   /* expect nonzeros in A/B most likely have local contributing entries        */
6629   PetscInt    Annz = Ai[m];
6630   PetscInt    Bnnz = Bi[m];
6631   PetscCount *Ajmap1_new, *Bjmap1_new;
6632 
6633   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6634   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6635 
6636   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6637   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6638 
6639   PetscCall(PetscFree(Aimap1));
6640   PetscCall(PetscFree(Ajmap1));
6641   PetscCall(PetscFree(Bimap1));
6642   PetscCall(PetscFree(Bjmap1));
6643   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6644   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6645   PetscCall(PetscFree(perm1));
6646   PetscCall(PetscFree3(i2, j2, perm2));
6647 
6648   Ajmap1 = Ajmap1_new;
6649   Bjmap1 = Bjmap1_new;
6650 
6651   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6652   if (Annz < Annz1 + Annz2) {
6653     PetscInt *Aj_new;
6654     PetscCall(PetscMalloc1(Annz, &Aj_new));
6655     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6656     PetscCall(PetscFree(Aj));
6657     Aj = Aj_new;
6658   }
6659 
6660   if (Bnnz < Bnnz1 + Bnnz2) {
6661     PetscInt *Bj_new;
6662     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6663     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6664     PetscCall(PetscFree(Bj));
6665     Bj = Bj_new;
6666   }
6667 
6668   /* Create new submatrices for on-process and off-process coupling                  */
6669   PetscScalar     *Aa, *Ba;
6670   MatType          rtype;
6671   Mat_SeqAIJ      *a, *b;
6672   PetscObjectState state;
6673   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6674   PetscCall(PetscCalloc1(Bnnz, &Ba));
6675   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6676   if (cstart) {
6677     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6678   }
6679   PetscCall(MatDestroy(&mpiaij->A));
6680   PetscCall(MatDestroy(&mpiaij->B));
6681   PetscCall(MatGetRootType_Private(mat, &rtype));
6682   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6683   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6684   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6685   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6686   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6687   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6688 
6689   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6690   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6691   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6692   a->free_a = b->free_a = PETSC_TRUE;
6693   a->free_ij = b->free_ij = PETSC_TRUE;
6694 
6695   /* conversion must happen AFTER multiply setup */
6696   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6697   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6698   PetscCall(VecDestroy(&mpiaij->lvec));
6699   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6700 
6701   // Put the COO struct in a container and then attach that to the matrix
6702   PetscCall(PetscMalloc1(1, &coo));
6703   coo->n       = coo_n;
6704   coo->sf      = sf2;
6705   coo->sendlen = nleaves;
6706   coo->recvlen = nroots;
6707   coo->Annz    = Annz;
6708   coo->Bnnz    = Bnnz;
6709   coo->Annz2   = Annz2;
6710   coo->Bnnz2   = Bnnz2;
6711   coo->Atot1   = Atot1;
6712   coo->Atot2   = Atot2;
6713   coo->Btot1   = Btot1;
6714   coo->Btot2   = Btot2;
6715   coo->Ajmap1  = Ajmap1;
6716   coo->Aperm1  = Aperm1;
6717   coo->Bjmap1  = Bjmap1;
6718   coo->Bperm1  = Bperm1;
6719   coo->Aimap2  = Aimap2;
6720   coo->Ajmap2  = Ajmap2;
6721   coo->Aperm2  = Aperm2;
6722   coo->Bimap2  = Bimap2;
6723   coo->Bjmap2  = Bjmap2;
6724   coo->Bperm2  = Bperm2;
6725   coo->Cperm1  = Cperm1;
6726   // Allocate in preallocation. If not used, it has zero cost on host
6727   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6728   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6729   PetscCall(PetscContainerSetPointer(container, coo));
6730   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6731   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6732   PetscCall(PetscContainerDestroy(&container));
6733   PetscFunctionReturn(PETSC_SUCCESS);
6734 }
6735 
6736 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6737 {
6738   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6739   Mat                  A = mpiaij->A, B = mpiaij->B;
6740   PetscScalar         *Aa, *Ba;
6741   PetscScalar         *sendbuf, *recvbuf;
6742   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6743   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6744   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6745   const PetscCount    *Cperm1;
6746   PetscContainer       container;
6747   MatCOOStruct_MPIAIJ *coo;
6748 
6749   PetscFunctionBegin;
6750   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6751   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6752   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6753   sendbuf = coo->sendbuf;
6754   recvbuf = coo->recvbuf;
6755   Ajmap1  = coo->Ajmap1;
6756   Ajmap2  = coo->Ajmap2;
6757   Aimap2  = coo->Aimap2;
6758   Bjmap1  = coo->Bjmap1;
6759   Bjmap2  = coo->Bjmap2;
6760   Bimap2  = coo->Bimap2;
6761   Aperm1  = coo->Aperm1;
6762   Aperm2  = coo->Aperm2;
6763   Bperm1  = coo->Bperm1;
6764   Bperm2  = coo->Bperm2;
6765   Cperm1  = coo->Cperm1;
6766 
6767   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6768   PetscCall(MatSeqAIJGetArray(B, &Ba));
6769 
6770   /* Pack entries to be sent to remote */
6771   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6772 
6773   /* Send remote entries to their owner and overlap the communication with local computation */
6774   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6775   /* Add local entries to A and B */
6776   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6777     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6778     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6779     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6780   }
6781   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6782     PetscScalar sum = 0.0;
6783     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6784     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6785   }
6786   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6787 
6788   /* Add received remote entries to A and B */
6789   for (PetscCount i = 0; i < coo->Annz2; i++) {
6790     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6791   }
6792   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6793     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6794   }
6795   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6796   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6797   PetscFunctionReturn(PETSC_SUCCESS);
6798 }
6799 
6800 /*MC
6801    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6802 
6803    Options Database Keys:
6804 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6805 
6806    Level: beginner
6807 
6808    Notes:
6809    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6810     in this case the values associated with the rows and columns one passes in are set to zero
6811     in the matrix
6812 
6813     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6814     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6815 
6816 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6817 M*/
6818 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6819 {
6820   Mat_MPIAIJ *b;
6821   PetscMPIInt size;
6822 
6823   PetscFunctionBegin;
6824   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6825 
6826   PetscCall(PetscNew(&b));
6827   B->data       = (void *)b;
6828   B->ops[0]     = MatOps_Values;
6829   B->assembled  = PETSC_FALSE;
6830   B->insertmode = NOT_SET_VALUES;
6831   b->size       = size;
6832 
6833   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6834 
6835   /* build cache for off array entries formed */
6836   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6837 
6838   b->donotstash  = PETSC_FALSE;
6839   b->colmap      = NULL;
6840   b->garray      = NULL;
6841   b->roworiented = PETSC_TRUE;
6842 
6843   /* stuff used for matrix vector multiply */
6844   b->lvec  = NULL;
6845   b->Mvctx = NULL;
6846 
6847   /* stuff for MatGetRow() */
6848   b->rowindices   = NULL;
6849   b->rowvalues    = NULL;
6850   b->getrowactive = PETSC_FALSE;
6851 
6852   /* flexible pointer used in CUSPARSE classes */
6853   b->spptr = NULL;
6854 
6855   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6856   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6857   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6858   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6859   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6860   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6861   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6862   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6863   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6864   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6865 #if defined(PETSC_HAVE_CUDA)
6866   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6867 #endif
6868 #if defined(PETSC_HAVE_HIP)
6869   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6870 #endif
6871 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6872   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6873 #endif
6874 #if defined(PETSC_HAVE_MKL_SPARSE)
6875   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6876 #endif
6877   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6878   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6879   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6880   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6881 #if defined(PETSC_HAVE_ELEMENTAL)
6882   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6883 #endif
6884 #if defined(PETSC_HAVE_SCALAPACK)
6885   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6886 #endif
6887   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6888   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6889 #if defined(PETSC_HAVE_HYPRE)
6890   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6891   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6892 #endif
6893   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6894   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6897   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6898   PetscFunctionReturn(PETSC_SUCCESS);
6899 }
6900 
6901 /*@C
6902   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6903   and "off-diagonal" part of the matrix in CSR format.
6904 
6905   Collective
6906 
6907   Input Parameters:
6908 + comm - MPI communicator
6909 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6910 . n    - This value should be the same as the local size used in creating the
6911        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6912        calculated if `N` is given) For square matrices `n` is almost always `m`.
6913 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6914 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6915 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6916 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6917 . a    - matrix values
6918 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6919 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6920 - oa   - matrix values
6921 
6922   Output Parameter:
6923 . mat - the matrix
6924 
6925   Level: advanced
6926 
6927   Notes:
6928   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6929   must free the arrays once the matrix has been destroyed and not before.
6930 
6931   The `i` and `j` indices are 0 based
6932 
6933   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6934 
6935   This sets local rows and cannot be used to set off-processor values.
6936 
6937   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6938   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6939   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6940   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6941   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6942   communication if it is known that only local entries will be set.
6943 
6944 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6945           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6946 @*/
6947 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6948 {
6949   Mat_MPIAIJ *maij;
6950 
6951   PetscFunctionBegin;
6952   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6953   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6954   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6955   PetscCall(MatCreate(comm, mat));
6956   PetscCall(MatSetSizes(*mat, m, n, M, N));
6957   PetscCall(MatSetType(*mat, MATMPIAIJ));
6958   maij = (Mat_MPIAIJ *)(*mat)->data;
6959 
6960   (*mat)->preallocated = PETSC_TRUE;
6961 
6962   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6963   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6964 
6965   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6966   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6967 
6968   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6969   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6970   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6971   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6972   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6973   PetscFunctionReturn(PETSC_SUCCESS);
6974 }
6975 
6976 typedef struct {
6977   Mat       *mp;    /* intermediate products */
6978   PetscBool *mptmp; /* is the intermediate product temporary ? */
6979   PetscInt   cp;    /* number of intermediate products */
6980 
6981   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6982   PetscInt    *startsj_s, *startsj_r;
6983   PetscScalar *bufa;
6984   Mat          P_oth;
6985 
6986   /* may take advantage of merging product->B */
6987   Mat Bloc; /* B-local by merging diag and off-diag */
6988 
6989   /* cusparse does not have support to split between symbolic and numeric phases.
6990      When api_user is true, we don't need to update the numerical values
6991      of the temporary storage */
6992   PetscBool reusesym;
6993 
6994   /* support for COO values insertion */
6995   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6996   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6997   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6998   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6999   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7000   PetscMemType mtype;
7001 
7002   /* customization */
7003   PetscBool abmerge;
7004   PetscBool P_oth_bind;
7005 } MatMatMPIAIJBACKEND;
7006 
7007 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7008 {
7009   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7010   PetscInt             i;
7011 
7012   PetscFunctionBegin;
7013   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7014   PetscCall(PetscFree(mmdata->bufa));
7015   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7016   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7017   PetscCall(MatDestroy(&mmdata->P_oth));
7018   PetscCall(MatDestroy(&mmdata->Bloc));
7019   PetscCall(PetscSFDestroy(&mmdata->sf));
7020   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7021   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7022   PetscCall(PetscFree(mmdata->own[0]));
7023   PetscCall(PetscFree(mmdata->own));
7024   PetscCall(PetscFree(mmdata->off[0]));
7025   PetscCall(PetscFree(mmdata->off));
7026   PetscCall(PetscFree(mmdata));
7027   PetscFunctionReturn(PETSC_SUCCESS);
7028 }
7029 
7030 /* Copy selected n entries with indices in idx[] of A to v[].
7031    If idx is NULL, copy the whole data array of A to v[]
7032  */
7033 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7034 {
7035   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7036 
7037   PetscFunctionBegin;
7038   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7039   if (f) {
7040     PetscCall((*f)(A, n, idx, v));
7041   } else {
7042     const PetscScalar *vv;
7043 
7044     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7045     if (n && idx) {
7046       PetscScalar    *w  = v;
7047       const PetscInt *oi = idx;
7048       PetscInt        j;
7049 
7050       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7051     } else {
7052       PetscCall(PetscArraycpy(v, vv, n));
7053     }
7054     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7055   }
7056   PetscFunctionReturn(PETSC_SUCCESS);
7057 }
7058 
7059 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7060 {
7061   MatMatMPIAIJBACKEND *mmdata;
7062   PetscInt             i, n_d, n_o;
7063 
7064   PetscFunctionBegin;
7065   MatCheckProduct(C, 1);
7066   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7067   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7068   if (!mmdata->reusesym) { /* update temporary matrices */
7069     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7070     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7071   }
7072   mmdata->reusesym = PETSC_FALSE;
7073 
7074   for (i = 0; i < mmdata->cp; i++) {
7075     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7076     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7077   }
7078   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7079     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7080 
7081     if (mmdata->mptmp[i]) continue;
7082     if (noff) {
7083       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7084 
7085       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7086       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7087       n_o += noff;
7088       n_d += nown;
7089     } else {
7090       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7091 
7092       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7093       n_d += mm->nz;
7094     }
7095   }
7096   if (mmdata->hasoffproc) { /* offprocess insertion */
7097     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7098     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7099   }
7100   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7101   PetscFunctionReturn(PETSC_SUCCESS);
7102 }
7103 
7104 /* Support for Pt * A, A * P, or Pt * A * P */
7105 #define MAX_NUMBER_INTERMEDIATE 4
7106 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7107 {
7108   Mat_Product           *product = C->product;
7109   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7110   Mat_MPIAIJ            *a, *p;
7111   MatMatMPIAIJBACKEND   *mmdata;
7112   ISLocalToGlobalMapping P_oth_l2g = NULL;
7113   IS                     glob      = NULL;
7114   const char            *prefix;
7115   char                   pprefix[256];
7116   const PetscInt        *globidx, *P_oth_idx;
7117   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7118   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7119   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7120                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7121                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7122   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7123 
7124   MatProductType ptype;
7125   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7126   PetscMPIInt    size;
7127 
7128   PetscFunctionBegin;
7129   MatCheckProduct(C, 1);
7130   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7131   ptype = product->type;
7132   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7133     ptype                                          = MATPRODUCT_AB;
7134     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7135   }
7136   switch (ptype) {
7137   case MATPRODUCT_AB:
7138     A          = product->A;
7139     P          = product->B;
7140     m          = A->rmap->n;
7141     n          = P->cmap->n;
7142     M          = A->rmap->N;
7143     N          = P->cmap->N;
7144     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7145     break;
7146   case MATPRODUCT_AtB:
7147     P          = product->A;
7148     A          = product->B;
7149     m          = P->cmap->n;
7150     n          = A->cmap->n;
7151     M          = P->cmap->N;
7152     N          = A->cmap->N;
7153     hasoffproc = PETSC_TRUE;
7154     break;
7155   case MATPRODUCT_PtAP:
7156     A          = product->A;
7157     P          = product->B;
7158     m          = P->cmap->n;
7159     n          = P->cmap->n;
7160     M          = P->cmap->N;
7161     N          = P->cmap->N;
7162     hasoffproc = PETSC_TRUE;
7163     break;
7164   default:
7165     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7166   }
7167   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7168   if (size == 1) hasoffproc = PETSC_FALSE;
7169 
7170   /* defaults */
7171   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7172     mp[i]    = NULL;
7173     mptmp[i] = PETSC_FALSE;
7174     rmapt[i] = -1;
7175     cmapt[i] = -1;
7176     rmapa[i] = NULL;
7177     cmapa[i] = NULL;
7178   }
7179 
7180   /* customization */
7181   PetscCall(PetscNew(&mmdata));
7182   mmdata->reusesym = product->api_user;
7183   if (ptype == MATPRODUCT_AB) {
7184     if (product->api_user) {
7185       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7186       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7187       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7188       PetscOptionsEnd();
7189     } else {
7190       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7191       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7192       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7193       PetscOptionsEnd();
7194     }
7195   } else if (ptype == MATPRODUCT_PtAP) {
7196     if (product->api_user) {
7197       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7198       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7199       PetscOptionsEnd();
7200     } else {
7201       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7202       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7203       PetscOptionsEnd();
7204     }
7205   }
7206   a = (Mat_MPIAIJ *)A->data;
7207   p = (Mat_MPIAIJ *)P->data;
7208   PetscCall(MatSetSizes(C, m, n, M, N));
7209   PetscCall(PetscLayoutSetUp(C->rmap));
7210   PetscCall(PetscLayoutSetUp(C->cmap));
7211   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7212   PetscCall(MatGetOptionsPrefix(C, &prefix));
7213 
7214   cp = 0;
7215   switch (ptype) {
7216   case MATPRODUCT_AB: /* A * P */
7217     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7218 
7219     /* A_diag * P_local (merged or not) */
7220     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7221       /* P is product->B */
7222       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7223       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7224       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7225       PetscCall(MatProductSetFill(mp[cp], product->fill));
7226       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7227       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7228       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7229       mp[cp]->product->api_user = product->api_user;
7230       PetscCall(MatProductSetFromOptions(mp[cp]));
7231       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7232       PetscCall(ISGetIndices(glob, &globidx));
7233       rmapt[cp] = 1;
7234       cmapt[cp] = 2;
7235       cmapa[cp] = globidx;
7236       mptmp[cp] = PETSC_FALSE;
7237       cp++;
7238     } else { /* A_diag * P_diag and A_diag * P_off */
7239       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7240       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7241       PetscCall(MatProductSetFill(mp[cp], product->fill));
7242       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7243       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7244       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7245       mp[cp]->product->api_user = product->api_user;
7246       PetscCall(MatProductSetFromOptions(mp[cp]));
7247       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7248       rmapt[cp] = 1;
7249       cmapt[cp] = 1;
7250       mptmp[cp] = PETSC_FALSE;
7251       cp++;
7252       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7253       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7254       PetscCall(MatProductSetFill(mp[cp], product->fill));
7255       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7256       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7257       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7258       mp[cp]->product->api_user = product->api_user;
7259       PetscCall(MatProductSetFromOptions(mp[cp]));
7260       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7261       rmapt[cp] = 1;
7262       cmapt[cp] = 2;
7263       cmapa[cp] = p->garray;
7264       mptmp[cp] = PETSC_FALSE;
7265       cp++;
7266     }
7267 
7268     /* A_off * P_other */
7269     if (mmdata->P_oth) {
7270       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7271       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7272       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7273       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7274       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7275       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7276       PetscCall(MatProductSetFill(mp[cp], product->fill));
7277       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7278       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7279       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7280       mp[cp]->product->api_user = product->api_user;
7281       PetscCall(MatProductSetFromOptions(mp[cp]));
7282       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7283       rmapt[cp] = 1;
7284       cmapt[cp] = 2;
7285       cmapa[cp] = P_oth_idx;
7286       mptmp[cp] = PETSC_FALSE;
7287       cp++;
7288     }
7289     break;
7290 
7291   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7292     /* A is product->B */
7293     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7294     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7295       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7296       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7297       PetscCall(MatProductSetFill(mp[cp], product->fill));
7298       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7299       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7300       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7301       mp[cp]->product->api_user = product->api_user;
7302       PetscCall(MatProductSetFromOptions(mp[cp]));
7303       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7304       PetscCall(ISGetIndices(glob, &globidx));
7305       rmapt[cp] = 2;
7306       rmapa[cp] = globidx;
7307       cmapt[cp] = 2;
7308       cmapa[cp] = globidx;
7309       mptmp[cp] = PETSC_FALSE;
7310       cp++;
7311     } else {
7312       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7313       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7314       PetscCall(MatProductSetFill(mp[cp], product->fill));
7315       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7316       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7317       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7318       mp[cp]->product->api_user = product->api_user;
7319       PetscCall(MatProductSetFromOptions(mp[cp]));
7320       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7321       PetscCall(ISGetIndices(glob, &globidx));
7322       rmapt[cp] = 1;
7323       cmapt[cp] = 2;
7324       cmapa[cp] = globidx;
7325       mptmp[cp] = PETSC_FALSE;
7326       cp++;
7327       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7328       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7329       PetscCall(MatProductSetFill(mp[cp], product->fill));
7330       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7331       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7332       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7333       mp[cp]->product->api_user = product->api_user;
7334       PetscCall(MatProductSetFromOptions(mp[cp]));
7335       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7336       rmapt[cp] = 2;
7337       rmapa[cp] = p->garray;
7338       cmapt[cp] = 2;
7339       cmapa[cp] = globidx;
7340       mptmp[cp] = PETSC_FALSE;
7341       cp++;
7342     }
7343     break;
7344   case MATPRODUCT_PtAP:
7345     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7346     /* P is product->B */
7347     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7348     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7349     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7350     PetscCall(MatProductSetFill(mp[cp], product->fill));
7351     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7352     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7353     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7354     mp[cp]->product->api_user = product->api_user;
7355     PetscCall(MatProductSetFromOptions(mp[cp]));
7356     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7357     PetscCall(ISGetIndices(glob, &globidx));
7358     rmapt[cp] = 2;
7359     rmapa[cp] = globidx;
7360     cmapt[cp] = 2;
7361     cmapa[cp] = globidx;
7362     mptmp[cp] = PETSC_FALSE;
7363     cp++;
7364     if (mmdata->P_oth) {
7365       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7366       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7367       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7368       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7369       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7370       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7371       PetscCall(MatProductSetFill(mp[cp], product->fill));
7372       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7373       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7374       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7375       mp[cp]->product->api_user = product->api_user;
7376       PetscCall(MatProductSetFromOptions(mp[cp]));
7377       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7378       mptmp[cp] = PETSC_TRUE;
7379       cp++;
7380       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7381       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7382       PetscCall(MatProductSetFill(mp[cp], product->fill));
7383       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7384       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7385       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7386       mp[cp]->product->api_user = product->api_user;
7387       PetscCall(MatProductSetFromOptions(mp[cp]));
7388       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7389       rmapt[cp] = 2;
7390       rmapa[cp] = globidx;
7391       cmapt[cp] = 2;
7392       cmapa[cp] = P_oth_idx;
7393       mptmp[cp] = PETSC_FALSE;
7394       cp++;
7395     }
7396     break;
7397   default:
7398     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7399   }
7400   /* sanity check */
7401   if (size > 1)
7402     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7403 
7404   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7405   for (i = 0; i < cp; i++) {
7406     mmdata->mp[i]    = mp[i];
7407     mmdata->mptmp[i] = mptmp[i];
7408   }
7409   mmdata->cp             = cp;
7410   C->product->data       = mmdata;
7411   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7412   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7413 
7414   /* memory type */
7415   mmdata->mtype = PETSC_MEMTYPE_HOST;
7416   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7417   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7418   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7419   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7420   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7421   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7422 
7423   /* prepare coo coordinates for values insertion */
7424 
7425   /* count total nonzeros of those intermediate seqaij Mats
7426     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7427     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7428     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7429   */
7430   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7431     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7432     if (mptmp[cp]) continue;
7433     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7434       const PetscInt *rmap = rmapa[cp];
7435       const PetscInt  mr   = mp[cp]->rmap->n;
7436       const PetscInt  rs   = C->rmap->rstart;
7437       const PetscInt  re   = C->rmap->rend;
7438       const PetscInt *ii   = mm->i;
7439       for (i = 0; i < mr; i++) {
7440         const PetscInt gr = rmap[i];
7441         const PetscInt nz = ii[i + 1] - ii[i];
7442         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7443         else ncoo_oown += nz;                  /* this row is local */
7444       }
7445     } else ncoo_d += mm->nz;
7446   }
7447 
7448   /*
7449     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7450 
7451     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7452 
7453     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7454 
7455     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7456     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7457     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7458 
7459     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7460     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7461   */
7462   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7463   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7464 
7465   /* gather (i,j) of nonzeros inserted by remote procs */
7466   if (hasoffproc) {
7467     PetscSF  msf;
7468     PetscInt ncoo2, *coo_i2, *coo_j2;
7469 
7470     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7471     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7472     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7473 
7474     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7475       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7476       PetscInt   *idxoff = mmdata->off[cp];
7477       PetscInt   *idxown = mmdata->own[cp];
7478       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7479         const PetscInt *rmap = rmapa[cp];
7480         const PetscInt *cmap = cmapa[cp];
7481         const PetscInt *ii   = mm->i;
7482         PetscInt       *coi  = coo_i + ncoo_o;
7483         PetscInt       *coj  = coo_j + ncoo_o;
7484         const PetscInt  mr   = mp[cp]->rmap->n;
7485         const PetscInt  rs   = C->rmap->rstart;
7486         const PetscInt  re   = C->rmap->rend;
7487         const PetscInt  cs   = C->cmap->rstart;
7488         for (i = 0; i < mr; i++) {
7489           const PetscInt *jj = mm->j + ii[i];
7490           const PetscInt  gr = rmap[i];
7491           const PetscInt  nz = ii[i + 1] - ii[i];
7492           if (gr < rs || gr >= re) { /* this is an offproc row */
7493             for (j = ii[i]; j < ii[i + 1]; j++) {
7494               *coi++    = gr;
7495               *idxoff++ = j;
7496             }
7497             if (!cmapt[cp]) { /* already global */
7498               for (j = 0; j < nz; j++) *coj++ = jj[j];
7499             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7500               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7501             } else { /* offdiag */
7502               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7503             }
7504             ncoo_o += nz;
7505           } else { /* this is a local row */
7506             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7507           }
7508         }
7509       }
7510       mmdata->off[cp + 1] = idxoff;
7511       mmdata->own[cp + 1] = idxown;
7512     }
7513 
7514     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7515     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7516     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7517     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7518     ncoo = ncoo_d + ncoo_oown + ncoo2;
7519     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7520     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7521     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7522     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7523     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7524     PetscCall(PetscFree2(coo_i, coo_j));
7525     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7526     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7527     coo_i = coo_i2;
7528     coo_j = coo_j2;
7529   } else { /* no offproc values insertion */
7530     ncoo = ncoo_d;
7531     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7532 
7533     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7534     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7535     PetscCall(PetscSFSetUp(mmdata->sf));
7536   }
7537   mmdata->hasoffproc = hasoffproc;
7538 
7539   /* gather (i,j) of nonzeros inserted locally */
7540   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7541     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7542     PetscInt       *coi  = coo_i + ncoo_d;
7543     PetscInt       *coj  = coo_j + ncoo_d;
7544     const PetscInt *jj   = mm->j;
7545     const PetscInt *ii   = mm->i;
7546     const PetscInt *cmap = cmapa[cp];
7547     const PetscInt *rmap = rmapa[cp];
7548     const PetscInt  mr   = mp[cp]->rmap->n;
7549     const PetscInt  rs   = C->rmap->rstart;
7550     const PetscInt  re   = C->rmap->rend;
7551     const PetscInt  cs   = C->cmap->rstart;
7552 
7553     if (mptmp[cp]) continue;
7554     if (rmapt[cp] == 1) { /* consecutive rows */
7555       /* fill coo_i */
7556       for (i = 0; i < mr; i++) {
7557         const PetscInt gr = i + rs;
7558         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7559       }
7560       /* fill coo_j */
7561       if (!cmapt[cp]) { /* type-0, already global */
7562         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7563       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7564         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7565       } else {                                            /* type-2, local to global for sparse columns */
7566         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7567       }
7568       ncoo_d += mm->nz;
7569     } else if (rmapt[cp] == 2) { /* sparse rows */
7570       for (i = 0; i < mr; i++) {
7571         const PetscInt *jj = mm->j + ii[i];
7572         const PetscInt  gr = rmap[i];
7573         const PetscInt  nz = ii[i + 1] - ii[i];
7574         if (gr >= rs && gr < re) { /* local rows */
7575           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7576           if (!cmapt[cp]) { /* type-0, already global */
7577             for (j = 0; j < nz; j++) *coj++ = jj[j];
7578           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7579             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7580           } else { /* type-2, local to global for sparse columns */
7581             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7582           }
7583           ncoo_d += nz;
7584         }
7585       }
7586     }
7587   }
7588   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7589   PetscCall(ISDestroy(&glob));
7590   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7591   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7592   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7593   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7594 
7595   /* preallocate with COO data */
7596   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7597   PetscCall(PetscFree2(coo_i, coo_j));
7598   PetscFunctionReturn(PETSC_SUCCESS);
7599 }
7600 
7601 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7602 {
7603   Mat_Product *product = mat->product;
7604 #if defined(PETSC_HAVE_DEVICE)
7605   PetscBool match  = PETSC_FALSE;
7606   PetscBool usecpu = PETSC_FALSE;
7607 #else
7608   PetscBool match = PETSC_TRUE;
7609 #endif
7610 
7611   PetscFunctionBegin;
7612   MatCheckProduct(mat, 1);
7613 #if defined(PETSC_HAVE_DEVICE)
7614   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7615   if (match) { /* we can always fallback to the CPU if requested */
7616     switch (product->type) {
7617     case MATPRODUCT_AB:
7618       if (product->api_user) {
7619         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7620         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7621         PetscOptionsEnd();
7622       } else {
7623         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7624         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7625         PetscOptionsEnd();
7626       }
7627       break;
7628     case MATPRODUCT_AtB:
7629       if (product->api_user) {
7630         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7631         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7632         PetscOptionsEnd();
7633       } else {
7634         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7635         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7636         PetscOptionsEnd();
7637       }
7638       break;
7639     case MATPRODUCT_PtAP:
7640       if (product->api_user) {
7641         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7642         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7643         PetscOptionsEnd();
7644       } else {
7645         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7646         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7647         PetscOptionsEnd();
7648       }
7649       break;
7650     default:
7651       break;
7652     }
7653     match = (PetscBool)!usecpu;
7654   }
7655 #endif
7656   if (match) {
7657     switch (product->type) {
7658     case MATPRODUCT_AB:
7659     case MATPRODUCT_AtB:
7660     case MATPRODUCT_PtAP:
7661       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7662       break;
7663     default:
7664       break;
7665     }
7666   }
7667   /* fallback to MPIAIJ ops */
7668   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7669   PetscFunctionReturn(PETSC_SUCCESS);
7670 }
7671 
7672 /*
7673    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7674 
7675    n - the number of block indices in cc[]
7676    cc - the block indices (must be large enough to contain the indices)
7677 */
7678 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7679 {
7680   PetscInt        cnt = -1, nidx, j;
7681   const PetscInt *idx;
7682 
7683   PetscFunctionBegin;
7684   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7685   if (nidx) {
7686     cnt     = 0;
7687     cc[cnt] = idx[0] / bs;
7688     for (j = 1; j < nidx; j++) {
7689       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7690     }
7691   }
7692   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7693   *n = cnt + 1;
7694   PetscFunctionReturn(PETSC_SUCCESS);
7695 }
7696 
7697 /*
7698     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7699 
7700     ncollapsed - the number of block indices
7701     collapsed - the block indices (must be large enough to contain the indices)
7702 */
7703 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7704 {
7705   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7706 
7707   PetscFunctionBegin;
7708   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7709   for (i = start + 1; i < start + bs; i++) {
7710     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7711     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7712     cprevtmp = cprev;
7713     cprev    = merged;
7714     merged   = cprevtmp;
7715   }
7716   *ncollapsed = nprev;
7717   if (collapsed) *collapsed = cprev;
7718   PetscFunctionReturn(PETSC_SUCCESS);
7719 }
7720 
7721 /*
7722  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7723 
7724  Input Parameter:
7725  . Amat - matrix
7726  - symmetrize - make the result symmetric
7727  + scale - scale with diagonal
7728 
7729  Output Parameter:
7730  . a_Gmat - output scalar graph >= 0
7731 
7732 */
7733 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7734 {
7735   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7736   MPI_Comm  comm;
7737   Mat       Gmat;
7738   PetscBool ismpiaij, isseqaij;
7739   Mat       a, b, c;
7740   MatType   jtype;
7741 
7742   PetscFunctionBegin;
7743   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7744   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7745   PetscCall(MatGetSize(Amat, &MM, &NN));
7746   PetscCall(MatGetBlockSize(Amat, &bs));
7747   nloc = (Iend - Istart) / bs;
7748 
7749   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7750   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7751   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7752 
7753   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7754   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7755      implementation */
7756   if (bs > 1) {
7757     PetscCall(MatGetType(Amat, &jtype));
7758     PetscCall(MatCreate(comm, &Gmat));
7759     PetscCall(MatSetType(Gmat, jtype));
7760     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7761     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7762     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7763       PetscInt  *d_nnz, *o_nnz;
7764       MatScalar *aa, val, *AA;
7765       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7766       if (isseqaij) {
7767         a = Amat;
7768         b = NULL;
7769       } else {
7770         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7771         a             = d->A;
7772         b             = d->B;
7773       }
7774       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7775       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7776       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7777         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7778         const PetscInt *cols1, *cols2;
7779         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7780           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7781           nnz[brow / bs] = nc2 / bs;
7782           if (nc2 % bs) ok = 0;
7783           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7784           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7785             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7786             if (nc1 != nc2) ok = 0;
7787             else {
7788               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7789                 if (cols1[jj] != cols2[jj]) ok = 0;
7790                 if (cols1[jj] % bs != jj % bs) ok = 0;
7791               }
7792             }
7793             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7794           }
7795           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7796           if (!ok) {
7797             PetscCall(PetscFree2(d_nnz, o_nnz));
7798             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7799             goto old_bs;
7800           }
7801         }
7802       }
7803       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7804       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7805       PetscCall(PetscFree2(d_nnz, o_nnz));
7806       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7807       // diag
7808       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7809         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7810         ai               = aseq->i;
7811         n                = ai[brow + 1] - ai[brow];
7812         aj               = aseq->j + ai[brow];
7813         for (int k = 0; k < n; k += bs) {        // block columns
7814           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7815           val        = 0;
7816           if (index_size == 0) {
7817             for (int ii = 0; ii < bs; ii++) { // rows in block
7818               aa = aseq->a + ai[brow + ii] + k;
7819               for (int jj = 0; jj < bs; jj++) {         // columns in block
7820                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7821               }
7822             }
7823           } else {                                       // use (index,index) value if provided
7824             for (int iii = 0; iii < index_size; iii++) { // rows in block
7825               int ii = index[iii];
7826               aa     = aseq->a + ai[brow + ii] + k;
7827               for (int jjj = 0; jjj < index_size; jjj++) { // columns in block
7828                 int jj = index[jjj];
7829                 val    = PetscAbs(PetscRealPart(aa[jj]));
7830               }
7831             }
7832           }
7833           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7834           AA[k / bs] = val;
7835         }
7836         grow = Istart / bs + brow / bs;
7837         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7838       }
7839       // off-diag
7840       if (ismpiaij) {
7841         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7842         const PetscScalar *vals;
7843         const PetscInt    *cols, *garray = aij->garray;
7844         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7845         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7846           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7847           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7848             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7849             AA[k / bs] = 0;
7850             AJ[cidx]   = garray[cols[k]] / bs;
7851           }
7852           nc = ncols / bs;
7853           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7854           if (index_size == 0) {
7855             for (int ii = 0; ii < bs; ii++) { // rows in block
7856               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7857               for (int k = 0; k < ncols; k += bs) {
7858                 for (int jj = 0; jj < bs; jj++) { // cols in block
7859                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7860                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7861                 }
7862               }
7863               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7864             }
7865           } else {                                       // use (index,index) value if provided
7866             for (int iii = 0; iii < index_size; iii++) { // rows in block
7867               int ii = index[iii];
7868               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7869               for (int k = 0; k < ncols; k += bs) {
7870                 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block
7871                   int jj = index[jjj];
7872                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7873                 }
7874               }
7875               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7876             }
7877           }
7878           grow = Istart / bs + brow / bs;
7879           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7880         }
7881       }
7882       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7883       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7884       PetscCall(PetscFree2(AA, AJ));
7885     } else {
7886       const PetscScalar *vals;
7887       const PetscInt    *idx;
7888       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7889     old_bs:
7890       /*
7891        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7892        */
7893       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7894       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7895       if (isseqaij) {
7896         PetscInt max_d_nnz;
7897         /*
7898          Determine exact preallocation count for (sequential) scalar matrix
7899          */
7900         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7901         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7902         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7903         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7904         PetscCall(PetscFree3(w0, w1, w2));
7905       } else if (ismpiaij) {
7906         Mat             Daij, Oaij;
7907         const PetscInt *garray;
7908         PetscInt        max_d_nnz;
7909         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7910         /*
7911          Determine exact preallocation count for diagonal block portion of scalar matrix
7912          */
7913         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7914         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7915         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7916         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7917         PetscCall(PetscFree3(w0, w1, w2));
7918         /*
7919          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7920          */
7921         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7922           o_nnz[jj] = 0;
7923           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7924             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7925             o_nnz[jj] += ncols;
7926             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7927           }
7928           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7929         }
7930       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7931       /* get scalar copy (norms) of matrix */
7932       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7933       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7934       PetscCall(PetscFree2(d_nnz, o_nnz));
7935       for (Ii = Istart; Ii < Iend; Ii++) {
7936         PetscInt dest_row = Ii / bs;
7937         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7938         for (jj = 0; jj < ncols; jj++) {
7939           PetscInt    dest_col = idx[jj] / bs;
7940           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7941           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7942         }
7943         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7944       }
7945       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7946       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7947     }
7948   } else {
7949     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7950     else {
7951       Gmat = Amat;
7952       PetscCall(PetscObjectReference((PetscObject)Gmat));
7953     }
7954     if (isseqaij) {
7955       a = Gmat;
7956       b = NULL;
7957     } else {
7958       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7959       a             = d->A;
7960       b             = d->B;
7961     }
7962     if (filter >= 0 || scale) {
7963       /* take absolute value of each entry */
7964       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7965         MatInfo      info;
7966         PetscScalar *avals;
7967         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7968         PetscCall(MatSeqAIJGetArray(c, &avals));
7969         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7970         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7971       }
7972     }
7973   }
7974   if (symmetrize) {
7975     PetscBool isset, issym;
7976     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7977     if (!isset || !issym) {
7978       Mat matTrans;
7979       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7980       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7981       PetscCall(MatDestroy(&matTrans));
7982     }
7983     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
7984   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7985   if (scale) {
7986     /* scale c for all diagonal values = 1 or -1 */
7987     Vec diag;
7988     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7989     PetscCall(MatGetDiagonal(Gmat, diag));
7990     PetscCall(VecReciprocal(diag));
7991     PetscCall(VecSqrtAbs(diag));
7992     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7993     PetscCall(VecDestroy(&diag));
7994   }
7995   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7996 
7997   if (filter >= 0) {
7998     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
7999     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8000   }
8001   *a_Gmat = Gmat;
8002   PetscFunctionReturn(PETSC_SUCCESS);
8003 }
8004 
8005 /*
8006     Special version for direct calls from Fortran
8007 */
8008 #include <petsc/private/fortranimpl.h>
8009 
8010 /* Change these macros so can be used in void function */
8011 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8012 #undef PetscCall
8013 #define PetscCall(...) \
8014   do { \
8015     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8016     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8017       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8018       return; \
8019     } \
8020   } while (0)
8021 
8022 #undef SETERRQ
8023 #define SETERRQ(comm, ierr, ...) \
8024   do { \
8025     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8026     return; \
8027   } while (0)
8028 
8029 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8030   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8031 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8032   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8033 #else
8034 #endif
8035 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8036 {
8037   Mat         mat = *mmat;
8038   PetscInt    m = *mm, n = *mn;
8039   InsertMode  addv = *maddv;
8040   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8041   PetscScalar value;
8042 
8043   MatCheckPreallocated(mat, 1);
8044   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8045   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8046   {
8047     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8048     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8049     PetscBool roworiented = aij->roworiented;
8050 
8051     /* Some Variables required in the macro */
8052     Mat         A     = aij->A;
8053     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8054     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8055     MatScalar  *aa;
8056     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8057     Mat         B                 = aij->B;
8058     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8059     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8060     MatScalar  *ba;
8061     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8062      * cannot use "#if defined" inside a macro. */
8063     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8064 
8065     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8066     PetscInt   nonew = a->nonew;
8067     MatScalar *ap1, *ap2;
8068 
8069     PetscFunctionBegin;
8070     PetscCall(MatSeqAIJGetArray(A, &aa));
8071     PetscCall(MatSeqAIJGetArray(B, &ba));
8072     for (i = 0; i < m; i++) {
8073       if (im[i] < 0) continue;
8074       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8075       if (im[i] >= rstart && im[i] < rend) {
8076         row      = im[i] - rstart;
8077         lastcol1 = -1;
8078         rp1      = aj + ai[row];
8079         ap1      = aa + ai[row];
8080         rmax1    = aimax[row];
8081         nrow1    = ailen[row];
8082         low1     = 0;
8083         high1    = nrow1;
8084         lastcol2 = -1;
8085         rp2      = bj + bi[row];
8086         ap2      = ba + bi[row];
8087         rmax2    = bimax[row];
8088         nrow2    = bilen[row];
8089         low2     = 0;
8090         high2    = nrow2;
8091 
8092         for (j = 0; j < n; j++) {
8093           if (roworiented) value = v[i * n + j];
8094           else value = v[i + j * m];
8095           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8096           if (in[j] >= cstart && in[j] < cend) {
8097             col = in[j] - cstart;
8098             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8099           } else if (in[j] < 0) continue;
8100           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8101             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8102           } else {
8103             if (mat->was_assembled) {
8104               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8105 #if defined(PETSC_USE_CTABLE)
8106               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8107               col--;
8108 #else
8109               col = aij->colmap[in[j]] - 1;
8110 #endif
8111               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8112                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8113                 col = in[j];
8114                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8115                 B        = aij->B;
8116                 b        = (Mat_SeqAIJ *)B->data;
8117                 bimax    = b->imax;
8118                 bi       = b->i;
8119                 bilen    = b->ilen;
8120                 bj       = b->j;
8121                 rp2      = bj + bi[row];
8122                 ap2      = ba + bi[row];
8123                 rmax2    = bimax[row];
8124                 nrow2    = bilen[row];
8125                 low2     = 0;
8126                 high2    = nrow2;
8127                 bm       = aij->B->rmap->n;
8128                 ba       = b->a;
8129                 inserted = PETSC_FALSE;
8130               }
8131             } else col = in[j];
8132             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8133           }
8134         }
8135       } else if (!aij->donotstash) {
8136         if (roworiented) {
8137           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8138         } else {
8139           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8140         }
8141       }
8142     }
8143     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8144     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8145   }
8146   PetscFunctionReturnVoid();
8147 }
8148 
8149 /* Undefining these here since they were redefined from their original definition above! No
8150  * other PETSc functions should be defined past this point, as it is impossible to recover the
8151  * original definitions */
8152 #undef PetscCall
8153 #undef SETERRQ
8154