xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d8a51d2aac522cef49bfbd84fd4cf473dc3fa6ee)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166 
167   PetscFunctionReturn(PETSC_SUCCESS);
168 }
169 
170 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
171 {
172   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
173 
174   PetscFunctionBegin;
175   if (mat->A) {
176     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
177     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
178   }
179   PetscFunctionReturn(PETSC_SUCCESS);
180 }
181 
182 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
183 {
184   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
185   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
186   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
187   const PetscInt  *ia, *ib;
188   const MatScalar *aa, *bb, *aav, *bav;
189   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
190   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
191 
192   PetscFunctionBegin;
193   *keptrows = NULL;
194 
195   ia = a->i;
196   ib = b->i;
197   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
198   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
199   for (i = 0; i < m; i++) {
200     na = ia[i + 1] - ia[i];
201     nb = ib[i + 1] - ib[i];
202     if (!na && !nb) {
203       cnt++;
204       goto ok1;
205     }
206     aa = aav + ia[i];
207     for (j = 0; j < na; j++) {
208       if (aa[j] != 0.0) goto ok1;
209     }
210     bb = PetscSafePointerPlusOffset(bav, ib[i]);
211     for (j = 0; j < nb; j++) {
212       if (bb[j] != 0.0) goto ok1;
213     }
214     cnt++;
215   ok1:;
216   }
217   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
218   if (!n0rows) {
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
220     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
221     PetscFunctionReturn(PETSC_SUCCESS);
222   }
223   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
224   cnt = 0;
225   for (i = 0; i < m; i++) {
226     na = ia[i + 1] - ia[i];
227     nb = ib[i + 1] - ib[i];
228     if (!na && !nb) continue;
229     aa = aav + ia[i];
230     for (j = 0; j < na; j++) {
231       if (aa[j] != 0.0) {
232         rows[cnt++] = rstart + i;
233         goto ok2;
234       }
235     }
236     bb = PetscSafePointerPlusOffset(bav, ib[i]);
237     for (j = 0; j < nb; j++) {
238       if (bb[j] != 0.0) {
239         rows[cnt++] = rstart + i;
240         goto ok2;
241       }
242     }
243   ok2:;
244   }
245   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
247   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
252 {
253   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
254   PetscBool   cong;
255 
256   PetscFunctionBegin;
257   PetscCall(MatHasCongruentLayouts(Y, &cong));
258   if (Y->assembled && cong) {
259     PetscCall(MatDiagonalSet(aij->A, D, is));
260   } else {
261     PetscCall(MatDiagonalSet_Default(Y, D, is));
262   }
263   PetscFunctionReturn(PETSC_SUCCESS);
264 }
265 
266 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
267 {
268   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
269   PetscInt    i, rstart, nrows, *rows;
270 
271   PetscFunctionBegin;
272   *zrows = NULL;
273   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
274   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
275   for (i = 0; i < nrows; i++) rows[i] += rstart;
276   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
280 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
281 {
282   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
283   PetscInt           i, m, n, *garray = aij->garray;
284   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
285   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
286   PetscReal         *work;
287   const PetscScalar *dummy;
288 
289   PetscFunctionBegin;
290   PetscCall(MatGetSize(A, &m, &n));
291   PetscCall(PetscCalloc1(n, &work));
292   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
294   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
296   if (type == NORM_2) {
297     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
298     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
299   } else if (type == NORM_1) {
300     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
301     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
302   } else if (type == NORM_INFINITY) {
303     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
304     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
306     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
307     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
308   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
309     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
310     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
311   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
312   if (type == NORM_INFINITY) {
313     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
314   } else {
315     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
316   }
317   PetscCall(PetscFree(work));
318   if (type == NORM_2) {
319     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
320   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
321     for (i = 0; i < n; i++) reductions[i] /= m;
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
327 {
328   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
329   IS              sis, gis;
330   const PetscInt *isis, *igis;
331   PetscInt        n, *iis, nsis, ngis, rstart, i;
332 
333   PetscFunctionBegin;
334   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
335   PetscCall(MatFindNonzeroRows(a->B, &gis));
336   PetscCall(ISGetSize(gis, &ngis));
337   PetscCall(ISGetSize(sis, &nsis));
338   PetscCall(ISGetIndices(sis, &isis));
339   PetscCall(ISGetIndices(gis, &igis));
340 
341   PetscCall(PetscMalloc1(ngis + nsis, &iis));
342   PetscCall(PetscArraycpy(iis, igis, ngis));
343   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
344   n = ngis + nsis;
345   PetscCall(PetscSortRemoveDupsInt(&n, iis));
346   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
347   for (i = 0; i < n; i++) iis[i] += rstart;
348   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
349 
350   PetscCall(ISRestoreIndices(sis, &isis));
351   PetscCall(ISRestoreIndices(gis, &igis));
352   PetscCall(ISDestroy(&sis));
353   PetscCall(ISDestroy(&gis));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 /*
358   Local utility routine that creates a mapping from the global column
359 number to the local number in the off-diagonal part of the local
360 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
361 a slightly higher hash table cost; without it it is not scalable (each processor
362 has an order N integer array but is fast to access.
363 */
364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
365 {
366   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
367   PetscInt    n   = aij->B->cmap->n, i;
368 
369   PetscFunctionBegin;
370   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
371 #if defined(PETSC_USE_CTABLE)
372   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
373   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
374 #else
375   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
376   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
377 #endif
378   PetscFunctionReturn(PETSC_SUCCESS);
379 }
380 
381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
382   do { \
383     if (col <= lastcol1) low1 = 0; \
384     else high1 = nrow1; \
385     lastcol1 = col; \
386     while (high1 - low1 > 5) { \
387       t = (low1 + high1) / 2; \
388       if (rp1[t] > col) high1 = t; \
389       else low1 = t; \
390     } \
391     for (_i = low1; _i < high1; _i++) { \
392       if (rp1[_i] > col) break; \
393       if (rp1[_i] == col) { \
394         if (addv == ADD_VALUES) { \
395           ap1[_i] += value; \
396           /* Not sure LogFlops will slow dow the code or not */ \
397           (void)PetscLogFlops(1.0); \
398         } else ap1[_i] = value; \
399         goto a_noinsert; \
400       } \
401     } \
402     if (value == 0.0 && ignorezeroentries && row != col) { \
403       low1  = 0; \
404       high1 = nrow1; \
405       goto a_noinsert; \
406     } \
407     if (nonew == 1) { \
408       low1  = 0; \
409       high1 = nrow1; \
410       goto a_noinsert; \
411     } \
412     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
413     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
414     N = nrow1++ - 1; \
415     a->nz++; \
416     high1++; \
417     /* shift up all the later entries in this row */ \
418     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
419     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
420     rp1[_i] = col; \
421     ap1[_i] = value; \
422     A->nonzerostate++; \
423   a_noinsert:; \
424     ailen[row] = nrow1; \
425   } while (0)
426 
427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
428   do { \
429     if (col <= lastcol2) low2 = 0; \
430     else high2 = nrow2; \
431     lastcol2 = col; \
432     while (high2 - low2 > 5) { \
433       t = (low2 + high2) / 2; \
434       if (rp2[t] > col) high2 = t; \
435       else low2 = t; \
436     } \
437     for (_i = low2; _i < high2; _i++) { \
438       if (rp2[_i] > col) break; \
439       if (rp2[_i] == col) { \
440         if (addv == ADD_VALUES) { \
441           ap2[_i] += value; \
442           (void)PetscLogFlops(1.0); \
443         } else ap2[_i] = value; \
444         goto b_noinsert; \
445       } \
446     } \
447     if (value == 0.0 && ignorezeroentries) { \
448       low2  = 0; \
449       high2 = nrow2; \
450       goto b_noinsert; \
451     } \
452     if (nonew == 1) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
458     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
459     N = nrow2++ - 1; \
460     b->nz++; \
461     high2++; \
462     /* shift up all the later entries in this row */ \
463     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
464     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
465     rp2[_i] = col; \
466     ap2[_i] = value; \
467     B->nonzerostate++; \
468   b_noinsert:; \
469     bilen[row] = nrow2; \
470   } while (0)
471 
472 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
473 {
474   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
475   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
476   PetscInt     l, *garray                         = mat->garray, diag;
477   PetscScalar *aa, *ba;
478 
479   PetscFunctionBegin;
480   /* code only works for square matrices A */
481 
482   /* find size of row to the left of the diagonal part */
483   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
484   row = row - diag;
485   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
486     if (garray[b->j[b->i[row] + l]] > diag) break;
487   }
488   if (l) {
489     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
490     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
491     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
492   }
493 
494   /* diagonal part */
495   if (a->i[row + 1] - a->i[row]) {
496     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
497     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
498     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
499   }
500 
501   /* right of diagonal part */
502   if (b->i[row + 1] - b->i[row] - l) {
503     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
504     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
505     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
506   }
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
511 {
512   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
513   PetscScalar value = 0.0;
514   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
515   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
516   PetscBool   roworiented = aij->roworiented;
517 
518   /* Some Variables required in the macro */
519   Mat         A     = aij->A;
520   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
521   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
522   PetscBool   ignorezeroentries = a->ignorezeroentries;
523   Mat         B                 = aij->B;
524   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
525   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
526   MatScalar  *aa, *ba;
527   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
528   PetscInt    nonew;
529   MatScalar  *ap1, *ap2;
530 
531   PetscFunctionBegin;
532   PetscCall(MatSeqAIJGetArray(A, &aa));
533   PetscCall(MatSeqAIJGetArray(B, &ba));
534   for (i = 0; i < m; i++) {
535     if (im[i] < 0) continue;
536     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
537     if (im[i] >= rstart && im[i] < rend) {
538       row      = im[i] - rstart;
539       lastcol1 = -1;
540       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
541       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
542       rmax1    = aimax[row];
543       nrow1    = ailen[row];
544       low1     = 0;
545       high1    = nrow1;
546       lastcol2 = -1;
547       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
548       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
549       rmax2    = bimax[row];
550       nrow2    = bilen[row];
551       low2     = 0;
552       high2    = nrow2;
553 
554       for (j = 0; j < n; j++) {
555         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
556         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
557         if (in[j] >= cstart && in[j] < cend) {
558           col   = in[j] - cstart;
559           nonew = a->nonew;
560           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
561         } else if (in[j] < 0) {
562           continue;
563         } else {
564           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
565           if (mat->was_assembled) {
566             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
567 #if defined(PETSC_USE_CTABLE)
568             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
569             col--;
570 #else
571             col = aij->colmap[in[j]] - 1;
572 #endif
573             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
574               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
575               col = in[j];
576               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
577               B     = aij->B;
578               b     = (Mat_SeqAIJ *)B->data;
579               bimax = b->imax;
580               bi    = b->i;
581               bilen = b->ilen;
582               bj    = b->j;
583               ba    = b->a;
584               rp2   = bj + bi[row];
585               ap2   = ba + bi[row];
586               rmax2 = bimax[row];
587               nrow2 = bilen[row];
588               low2  = 0;
589               high2 = nrow2;
590               bm    = aij->B->rmap->n;
591               ba    = b->a;
592             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
593               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
594                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
595               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
596             }
597           } else col = in[j];
598           nonew = b->nonew;
599           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
600         }
601       }
602     } else {
603       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
604       if (!aij->donotstash) {
605         mat->assembled = PETSC_FALSE;
606         if (roworiented) {
607           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
608         } else {
609           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
610         }
611       }
612     }
613   }
614   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
615   PetscCall(MatSeqAIJRestoreArray(B, &ba));
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 /*
620     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
621     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
622     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
623 */
624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
625 {
626   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
627   Mat         A      = aij->A; /* diagonal part of the matrix */
628   Mat         B      = aij->B; /* off-diagonal part of the matrix */
629   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
630   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
631   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
632   PetscInt   *ailen = a->ilen, *aj = a->j;
633   PetscInt   *bilen = b->ilen, *bj = b->j;
634   PetscInt    am          = aij->A->rmap->n, j;
635   PetscInt    diag_so_far = 0, dnz;
636   PetscInt    offd_so_far = 0, onz;
637 
638   PetscFunctionBegin;
639   /* Iterate over all rows of the matrix */
640   for (j = 0; j < am; j++) {
641     dnz = onz = 0;
642     /*  Iterate over all non-zero columns of the current row */
643     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
644       /* If column is in the diagonal */
645       if (mat_j[col] >= cstart && mat_j[col] < cend) {
646         aj[diag_so_far++] = mat_j[col] - cstart;
647         dnz++;
648       } else { /* off-diagonal entries */
649         bj[offd_so_far++] = mat_j[col];
650         onz++;
651       }
652     }
653     ailen[j] = dnz;
654     bilen[j] = onz;
655   }
656   PetscFunctionReturn(PETSC_SUCCESS);
657 }
658 
659 /*
660     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
661     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
662     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
663     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
664     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
665 */
666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
667 {
668   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
669   Mat          A    = aij->A; /* diagonal part of the matrix */
670   Mat          B    = aij->B; /* off-diagonal part of the matrix */
671   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
672   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
673   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
674   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
675   PetscInt    *ailen = a->ilen, *aj = a->j;
676   PetscInt    *bilen = b->ilen, *bj = b->j;
677   PetscInt     am          = aij->A->rmap->n, j;
678   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
679   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
680   PetscScalar *aa = a->a, *ba = b->a;
681 
682   PetscFunctionBegin;
683   /* Iterate over all rows of the matrix */
684   for (j = 0; j < am; j++) {
685     dnz_row = onz_row = 0;
686     rowstart_offd     = full_offd_i[j];
687     rowstart_diag     = full_diag_i[j];
688     /*  Iterate over all non-zero columns of the current row */
689     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
690       /* If column is in the diagonal */
691       if (mat_j[col] >= cstart && mat_j[col] < cend) {
692         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
693         aa[rowstart_diag + dnz_row] = mat_a[col];
694         dnz_row++;
695       } else { /* off-diagonal entries */
696         bj[rowstart_offd + onz_row] = mat_j[col];
697         ba[rowstart_offd + onz_row] = mat_a[col];
698         onz_row++;
699       }
700     }
701     ailen[j] = dnz_row;
702     bilen[j] = onz_row;
703   }
704   PetscFunctionReturn(PETSC_SUCCESS);
705 }
706 
707 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
708 {
709   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
710   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
711   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
712 
713   PetscFunctionBegin;
714   for (i = 0; i < m; i++) {
715     if (idxm[i] < 0) continue; /* negative row */
716     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
717     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
718     row = idxm[i] - rstart;
719     for (j = 0; j < n; j++) {
720       if (idxn[j] < 0) continue; /* negative column */
721       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
722       if (idxn[j] >= cstart && idxn[j] < cend) {
723         col = idxn[j] - cstart;
724         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
725       } else {
726         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
727 #if defined(PETSC_USE_CTABLE)
728         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
729         col--;
730 #else
731         col = aij->colmap[idxn[j]] - 1;
732 #endif
733         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
734         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
735       }
736     }
737   }
738   PetscFunctionReturn(PETSC_SUCCESS);
739 }
740 
741 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
742 {
743   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
744   PetscInt    nstash, reallocs;
745 
746   PetscFunctionBegin;
747   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
748 
749   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
750   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
751   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
752   PetscFunctionReturn(PETSC_SUCCESS);
753 }
754 
755 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
756 {
757   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
758   PetscMPIInt  n;
759   PetscInt     i, j, rstart, ncols, flg;
760   PetscInt    *row, *col;
761   PetscBool    other_disassembled;
762   PetscScalar *val;
763 
764   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
765 
766   PetscFunctionBegin;
767   if (!aij->donotstash && !mat->nooffprocentries) {
768     while (1) {
769       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
770       if (!flg) break;
771 
772       for (i = 0; i < n;) {
773         /* Now identify the consecutive vals belonging to the same row */
774         for (j = i, rstart = row[j]; j < n; j++) {
775           if (row[j] != rstart) break;
776         }
777         if (j < n) ncols = j - i;
778         else ncols = n - i;
779         /* Now assemble all these values with a single function call */
780         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
781         i = j;
782       }
783     }
784     PetscCall(MatStashScatterEnd_Private(&mat->stash));
785   }
786 #if defined(PETSC_HAVE_DEVICE)
787   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
788   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
789   if (mat->boundtocpu) {
790     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
791     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
792   }
793 #endif
794   PetscCall(MatAssemblyBegin(aij->A, mode));
795   PetscCall(MatAssemblyEnd(aij->A, mode));
796 
797   /* determine if any processor has disassembled, if so we must
798      also disassemble ourself, in order that we may reassemble. */
799   /*
800      if nonzero structure of submatrix B cannot change then we know that
801      no processor disassembled thus we can skip this stuff
802   */
803   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
804     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
805     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
806       PetscCall(MatDisAssemble_MPIAIJ(mat));
807     }
808   }
809   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
810   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
811 #if defined(PETSC_HAVE_DEVICE)
812   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
813 #endif
814   PetscCall(MatAssemblyBegin(aij->B, mode));
815   PetscCall(MatAssemblyEnd(aij->B, mode));
816 
817   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
818 
819   aij->rowvalues = NULL;
820 
821   PetscCall(VecDestroy(&aij->diag));
822 
823   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
824   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
825     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
826     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
827   }
828 #if defined(PETSC_HAVE_DEVICE)
829   mat->offloadmask = PETSC_OFFLOAD_BOTH;
830 #endif
831   PetscFunctionReturn(PETSC_SUCCESS);
832 }
833 
834 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
835 {
836   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
837 
838   PetscFunctionBegin;
839   PetscCall(MatZeroEntries(l->A));
840   PetscCall(MatZeroEntries(l->B));
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
845 {
846   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
847   PetscInt   *lrows;
848   PetscInt    r, len;
849   PetscBool   cong;
850 
851   PetscFunctionBegin;
852   /* get locally owned rows */
853   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
854   PetscCall(MatHasCongruentLayouts(A, &cong));
855   /* fix right hand side if needed */
856   if (x && b) {
857     const PetscScalar *xx;
858     PetscScalar       *bb;
859 
860     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
861     PetscCall(VecGetArrayRead(x, &xx));
862     PetscCall(VecGetArray(b, &bb));
863     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
864     PetscCall(VecRestoreArrayRead(x, &xx));
865     PetscCall(VecRestoreArray(b, &bb));
866   }
867 
868   if (diag != 0.0 && cong) {
869     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
870     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
871   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
872     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
873     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
874     PetscInt    nnwA, nnwB;
875     PetscBool   nnzA, nnzB;
876 
877     nnwA = aijA->nonew;
878     nnwB = aijB->nonew;
879     nnzA = aijA->keepnonzeropattern;
880     nnzB = aijB->keepnonzeropattern;
881     if (!nnzA) {
882       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
883       aijA->nonew = 0;
884     }
885     if (!nnzB) {
886       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
887       aijB->nonew = 0;
888     }
889     /* Must zero here before the next loop */
890     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
891     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
892     for (r = 0; r < len; ++r) {
893       const PetscInt row = lrows[r] + A->rmap->rstart;
894       if (row >= A->cmap->N) continue;
895       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
896     }
897     aijA->nonew = nnwA;
898     aijB->nonew = nnwB;
899   } else {
900     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
901     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
902   }
903   PetscCall(PetscFree(lrows));
904   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
905   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
906 
907   /* only change matrix nonzero state if pattern was allowed to be changed */
908   if (!((Mat_SeqAIJ *)(mat->A->data))->keepnonzeropattern || !((Mat_SeqAIJ *)(mat->A->data))->nonew) {
909     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
910     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
911   }
912   PetscFunctionReturn(PETSC_SUCCESS);
913 }
914 
915 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
916 {
917   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
918   PetscMPIInt        n = A->rmap->n;
919   PetscInt           i, j, r, m, len = 0;
920   PetscInt          *lrows, *owners = A->rmap->range;
921   PetscMPIInt        p = 0;
922   PetscSFNode       *rrows;
923   PetscSF            sf;
924   const PetscScalar *xx;
925   PetscScalar       *bb, *mask, *aij_a;
926   Vec                xmask, lmask;
927   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
928   const PetscInt    *aj, *ii, *ridx;
929   PetscScalar       *aa;
930 
931   PetscFunctionBegin;
932   /* Create SF where leaves are input rows and roots are owned rows */
933   PetscCall(PetscMalloc1(n, &lrows));
934   for (r = 0; r < n; ++r) lrows[r] = -1;
935   PetscCall(PetscMalloc1(N, &rrows));
936   for (r = 0; r < N; ++r) {
937     const PetscInt idx = rows[r];
938     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
939     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
940       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
941     }
942     rrows[r].rank  = p;
943     rrows[r].index = rows[r] - owners[p];
944   }
945   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
946   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
947   /* Collect flags for rows to be zeroed */
948   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
949   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
950   PetscCall(PetscSFDestroy(&sf));
951   /* Compress and put in row numbers */
952   for (r = 0; r < n; ++r)
953     if (lrows[r] >= 0) lrows[len++] = r;
954   /* zero diagonal part of matrix */
955   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
956   /* handle off-diagonal part of matrix */
957   PetscCall(MatCreateVecs(A, &xmask, NULL));
958   PetscCall(VecDuplicate(l->lvec, &lmask));
959   PetscCall(VecGetArray(xmask, &bb));
960   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
961   PetscCall(VecRestoreArray(xmask, &bb));
962   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
963   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
964   PetscCall(VecDestroy(&xmask));
965   if (x && b) { /* this code is buggy when the row and column layout don't match */
966     PetscBool cong;
967 
968     PetscCall(MatHasCongruentLayouts(A, &cong));
969     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
970     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
971     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
972     PetscCall(VecGetArrayRead(l->lvec, &xx));
973     PetscCall(VecGetArray(b, &bb));
974   }
975   PetscCall(VecGetArray(lmask, &mask));
976   /* remove zeroed rows of off-diagonal matrix */
977   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
978   ii = aij->i;
979   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
980   /* loop over all elements of off process part of matrix zeroing removed columns*/
981   if (aij->compressedrow.use) {
982     m    = aij->compressedrow.nrows;
983     ii   = aij->compressedrow.i;
984     ridx = aij->compressedrow.rindex;
985     for (i = 0; i < m; i++) {
986       n  = ii[i + 1] - ii[i];
987       aj = aij->j + ii[i];
988       aa = aij_a + ii[i];
989 
990       for (j = 0; j < n; j++) {
991         if (PetscAbsScalar(mask[*aj])) {
992           if (b) bb[*ridx] -= *aa * xx[*aj];
993           *aa = 0.0;
994         }
995         aa++;
996         aj++;
997       }
998       ridx++;
999     }
1000   } else { /* do not use compressed row format */
1001     m = l->B->rmap->n;
1002     for (i = 0; i < m; i++) {
1003       n  = ii[i + 1] - ii[i];
1004       aj = aij->j + ii[i];
1005       aa = aij_a + ii[i];
1006       for (j = 0; j < n; j++) {
1007         if (PetscAbsScalar(mask[*aj])) {
1008           if (b) bb[i] -= *aa * xx[*aj];
1009           *aa = 0.0;
1010         }
1011         aa++;
1012         aj++;
1013       }
1014     }
1015   }
1016   if (x && b) {
1017     PetscCall(VecRestoreArray(b, &bb));
1018     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1019   }
1020   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1021   PetscCall(VecRestoreArray(lmask, &mask));
1022   PetscCall(VecDestroy(&lmask));
1023   PetscCall(PetscFree(lrows));
1024 
1025   /* only change matrix nonzero state if pattern was allowed to be changed */
1026   if (!((Mat_SeqAIJ *)(l->A->data))->nonew) {
1027     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1028     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1029   }
1030   PetscFunctionReturn(PETSC_SUCCESS);
1031 }
1032 
1033 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1034 {
1035   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1036   PetscInt    nt;
1037   VecScatter  Mvctx = a->Mvctx;
1038 
1039   PetscFunctionBegin;
1040   PetscCall(VecGetLocalSize(xx, &nt));
1041   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1042   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1043   PetscUseTypeMethod(a->A, mult, xx, yy);
1044   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1045   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1046   PetscFunctionReturn(PETSC_SUCCESS);
1047 }
1048 
1049 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1050 {
1051   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1052 
1053   PetscFunctionBegin;
1054   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1055   PetscFunctionReturn(PETSC_SUCCESS);
1056 }
1057 
1058 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1059 {
1060   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1061   VecScatter  Mvctx = a->Mvctx;
1062 
1063   PetscFunctionBegin;
1064   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1065   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1066   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1067   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1068   PetscFunctionReturn(PETSC_SUCCESS);
1069 }
1070 
1071 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1072 {
1073   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1074 
1075   PetscFunctionBegin;
1076   /* do nondiagonal part */
1077   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1078   /* do local part */
1079   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1080   /* add partial results together */
1081   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1082   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1083   PetscFunctionReturn(PETSC_SUCCESS);
1084 }
1085 
1086 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1087 {
1088   MPI_Comm    comm;
1089   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1090   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1091   IS          Me, Notme;
1092   PetscInt    M, N, first, last, *notme, i;
1093   PetscBool   lf;
1094   PetscMPIInt size;
1095 
1096   PetscFunctionBegin;
1097   /* Easy test: symmetric diagonal block */
1098   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1099   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1100   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1101   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1102   PetscCallMPI(MPI_Comm_size(comm, &size));
1103   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1104 
1105   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1106   PetscCall(MatGetSize(Amat, &M, &N));
1107   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1108   PetscCall(PetscMalloc1(N - last + first, &notme));
1109   for (i = 0; i < first; i++) notme[i] = i;
1110   for (i = last; i < M; i++) notme[i - last + first] = i;
1111   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1112   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1113   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1114   Aoff = Aoffs[0];
1115   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1116   Boff = Boffs[0];
1117   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1118   PetscCall(MatDestroyMatrices(1, &Aoffs));
1119   PetscCall(MatDestroyMatrices(1, &Boffs));
1120   PetscCall(ISDestroy(&Me));
1121   PetscCall(ISDestroy(&Notme));
1122   PetscCall(PetscFree(notme));
1123   PetscFunctionReturn(PETSC_SUCCESS);
1124 }
1125 
1126 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1127 {
1128   PetscFunctionBegin;
1129   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1130   PetscFunctionReturn(PETSC_SUCCESS);
1131 }
1132 
1133 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1134 {
1135   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1136 
1137   PetscFunctionBegin;
1138   /* do nondiagonal part */
1139   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1140   /* do local part */
1141   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1142   /* add partial results together */
1143   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1144   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1145   PetscFunctionReturn(PETSC_SUCCESS);
1146 }
1147 
1148 /*
1149   This only works correctly for square matrices where the subblock A->A is the
1150    diagonal block
1151 */
1152 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1153 {
1154   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1155 
1156   PetscFunctionBegin;
1157   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1158   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1159   PetscCall(MatGetDiagonal(a->A, v));
1160   PetscFunctionReturn(PETSC_SUCCESS);
1161 }
1162 
1163 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1164 {
1165   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1166 
1167   PetscFunctionBegin;
1168   PetscCall(MatScale(a->A, aa));
1169   PetscCall(MatScale(a->B, aa));
1170   PetscFunctionReturn(PETSC_SUCCESS);
1171 }
1172 
1173 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1174 {
1175   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1176   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1177   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1178   const PetscInt    *garray = aij->garray;
1179   const PetscScalar *aa, *ba;
1180   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1181   PetscInt64         nz, hnz;
1182   PetscInt          *rowlens;
1183   PetscInt          *colidxs;
1184   PetscScalar       *matvals;
1185   PetscMPIInt        rank;
1186 
1187   PetscFunctionBegin;
1188   PetscCall(PetscViewerSetUp(viewer));
1189 
1190   M  = mat->rmap->N;
1191   N  = mat->cmap->N;
1192   m  = mat->rmap->n;
1193   rs = mat->rmap->rstart;
1194   cs = mat->cmap->rstart;
1195   nz = A->nz + B->nz;
1196 
1197   /* write matrix header */
1198   header[0] = MAT_FILE_CLASSID;
1199   header[1] = M;
1200   header[2] = N;
1201   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1202   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1203   if (rank == 0) {
1204     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1205     else header[3] = (PetscInt)hnz;
1206   }
1207   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1208 
1209   /* fill in and store row lengths  */
1210   PetscCall(PetscMalloc1(m, &rowlens));
1211   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1212   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1213   PetscCall(PetscFree(rowlens));
1214 
1215   /* fill in and store column indices */
1216   PetscCall(PetscMalloc1(nz, &colidxs));
1217   for (cnt = 0, i = 0; i < m; i++) {
1218     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1219       if (garray[B->j[jb]] > cs) break;
1220       colidxs[cnt++] = garray[B->j[jb]];
1221     }
1222     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1223     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1224   }
1225   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1226   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1227   PetscCall(PetscFree(colidxs));
1228 
1229   /* fill in and store nonzero values */
1230   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1231   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1232   PetscCall(PetscMalloc1(nz, &matvals));
1233   for (cnt = 0, i = 0; i < m; i++) {
1234     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1235       if (garray[B->j[jb]] > cs) break;
1236       matvals[cnt++] = ba[jb];
1237     }
1238     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1239     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1240   }
1241   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1242   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1243   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1244   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1245   PetscCall(PetscFree(matvals));
1246 
1247   /* write block size option to the viewer's .info file */
1248   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1249   PetscFunctionReturn(PETSC_SUCCESS);
1250 }
1251 
1252 #include <petscdraw.h>
1253 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1254 {
1255   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1256   PetscMPIInt       rank = aij->rank, size = aij->size;
1257   PetscBool         isdraw, iascii, isbinary;
1258   PetscViewer       sviewer;
1259   PetscViewerFormat format;
1260 
1261   PetscFunctionBegin;
1262   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1263   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1264   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1265   if (iascii) {
1266     PetscCall(PetscViewerGetFormat(viewer, &format));
1267     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1268       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1269       PetscCall(PetscMalloc1(size, &nz));
1270       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1271       for (i = 0; i < (PetscInt)size; i++) {
1272         nmax = PetscMax(nmax, nz[i]);
1273         nmin = PetscMin(nmin, nz[i]);
1274         navg += nz[i];
1275       }
1276       PetscCall(PetscFree(nz));
1277       navg = navg / size;
1278       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1279       PetscFunctionReturn(PETSC_SUCCESS);
1280     }
1281     PetscCall(PetscViewerGetFormat(viewer, &format));
1282     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1283       MatInfo   info;
1284       PetscInt *inodes = NULL;
1285 
1286       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1287       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1288       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1289       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1290       if (!inodes) {
1291         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1292                                                      (double)info.memory));
1293       } else {
1294         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1295                                                      (double)info.memory));
1296       }
1297       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1298       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1299       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1300       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1301       PetscCall(PetscViewerFlush(viewer));
1302       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1303       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1304       PetscCall(VecScatterView(aij->Mvctx, viewer));
1305       PetscFunctionReturn(PETSC_SUCCESS);
1306     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1307       PetscInt inodecount, inodelimit, *inodes;
1308       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1309       if (inodes) {
1310         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1311       } else {
1312         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1313       }
1314       PetscFunctionReturn(PETSC_SUCCESS);
1315     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1316       PetscFunctionReturn(PETSC_SUCCESS);
1317     }
1318   } else if (isbinary) {
1319     if (size == 1) {
1320       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1321       PetscCall(MatView(aij->A, viewer));
1322     } else {
1323       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1324     }
1325     PetscFunctionReturn(PETSC_SUCCESS);
1326   } else if (iascii && size == 1) {
1327     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1328     PetscCall(MatView(aij->A, viewer));
1329     PetscFunctionReturn(PETSC_SUCCESS);
1330   } else if (isdraw) {
1331     PetscDraw draw;
1332     PetscBool isnull;
1333     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1334     PetscCall(PetscDrawIsNull(draw, &isnull));
1335     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1336   }
1337 
1338   { /* assemble the entire matrix onto first processor */
1339     Mat A = NULL, Av;
1340     IS  isrow, iscol;
1341 
1342     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1343     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1344     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1345     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1346     /*  The commented code uses MatCreateSubMatrices instead */
1347     /*
1348     Mat *AA, A = NULL, Av;
1349     IS  isrow,iscol;
1350 
1351     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1352     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1353     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1354     if (rank == 0) {
1355        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1356        A    = AA[0];
1357        Av   = AA[0];
1358     }
1359     PetscCall(MatDestroySubMatrices(1,&AA));
1360 */
1361     PetscCall(ISDestroy(&iscol));
1362     PetscCall(ISDestroy(&isrow));
1363     /*
1364        Everyone has to call to draw the matrix since the graphics waits are
1365        synchronized across all processors that share the PetscDraw object
1366     */
1367     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1368     if (rank == 0) {
1369       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1370       PetscCall(MatView_SeqAIJ(Av, sviewer));
1371     }
1372     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1373     PetscCall(MatDestroy(&A));
1374   }
1375   PetscFunctionReturn(PETSC_SUCCESS);
1376 }
1377 
1378 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1379 {
1380   PetscBool iascii, isdraw, issocket, isbinary;
1381 
1382   PetscFunctionBegin;
1383   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1384   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1385   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1386   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1387   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1388   PetscFunctionReturn(PETSC_SUCCESS);
1389 }
1390 
1391 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1392 {
1393   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1394   Vec         bb1 = NULL;
1395   PetscBool   hasop;
1396 
1397   PetscFunctionBegin;
1398   if (flag == SOR_APPLY_UPPER) {
1399     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1400     PetscFunctionReturn(PETSC_SUCCESS);
1401   }
1402 
1403   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1404 
1405   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1406     if (flag & SOR_ZERO_INITIAL_GUESS) {
1407       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1408       its--;
1409     }
1410 
1411     while (its--) {
1412       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1413       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1414 
1415       /* update rhs: bb1 = bb - B*x */
1416       PetscCall(VecScale(mat->lvec, -1.0));
1417       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1418 
1419       /* local sweep */
1420       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1421     }
1422   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1423     if (flag & SOR_ZERO_INITIAL_GUESS) {
1424       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1425       its--;
1426     }
1427     while (its--) {
1428       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1429       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1430 
1431       /* update rhs: bb1 = bb - B*x */
1432       PetscCall(VecScale(mat->lvec, -1.0));
1433       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1434 
1435       /* local sweep */
1436       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1437     }
1438   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1439     if (flag & SOR_ZERO_INITIAL_GUESS) {
1440       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1441       its--;
1442     }
1443     while (its--) {
1444       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1445       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1446 
1447       /* update rhs: bb1 = bb - B*x */
1448       PetscCall(VecScale(mat->lvec, -1.0));
1449       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1450 
1451       /* local sweep */
1452       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1453     }
1454   } else if (flag & SOR_EISENSTAT) {
1455     Vec xx1;
1456 
1457     PetscCall(VecDuplicate(bb, &xx1));
1458     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1459 
1460     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1461     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1462     if (!mat->diag) {
1463       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1464       PetscCall(MatGetDiagonal(matin, mat->diag));
1465     }
1466     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1467     if (hasop) {
1468       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1469     } else {
1470       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1471     }
1472     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1473 
1474     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1475 
1476     /* local sweep */
1477     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1478     PetscCall(VecAXPY(xx, 1.0, xx1));
1479     PetscCall(VecDestroy(&xx1));
1480   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1481 
1482   PetscCall(VecDestroy(&bb1));
1483 
1484   matin->factorerrortype = mat->A->factorerrortype;
1485   PetscFunctionReturn(PETSC_SUCCESS);
1486 }
1487 
1488 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1489 {
1490   Mat             aA, aB, Aperm;
1491   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1492   PetscScalar    *aa, *ba;
1493   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1494   PetscSF         rowsf, sf;
1495   IS              parcolp = NULL;
1496   PetscBool       done;
1497 
1498   PetscFunctionBegin;
1499   PetscCall(MatGetLocalSize(A, &m, &n));
1500   PetscCall(ISGetIndices(rowp, &rwant));
1501   PetscCall(ISGetIndices(colp, &cwant));
1502   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1503 
1504   /* Invert row permutation to find out where my rows should go */
1505   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1506   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1507   PetscCall(PetscSFSetFromOptions(rowsf));
1508   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1509   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1510   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1511 
1512   /* Invert column permutation to find out where my columns should go */
1513   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1514   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1515   PetscCall(PetscSFSetFromOptions(sf));
1516   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1517   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1518   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1519   PetscCall(PetscSFDestroy(&sf));
1520 
1521   PetscCall(ISRestoreIndices(rowp, &rwant));
1522   PetscCall(ISRestoreIndices(colp, &cwant));
1523   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1524 
1525   /* Find out where my gcols should go */
1526   PetscCall(MatGetSize(aB, NULL, &ng));
1527   PetscCall(PetscMalloc1(ng, &gcdest));
1528   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1529   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1530   PetscCall(PetscSFSetFromOptions(sf));
1531   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1532   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1533   PetscCall(PetscSFDestroy(&sf));
1534 
1535   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1536   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1537   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1538   for (i = 0; i < m; i++) {
1539     PetscInt    row = rdest[i];
1540     PetscMPIInt rowner;
1541     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1542     for (j = ai[i]; j < ai[i + 1]; j++) {
1543       PetscInt    col = cdest[aj[j]];
1544       PetscMPIInt cowner;
1545       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1546       if (rowner == cowner) dnnz[i]++;
1547       else onnz[i]++;
1548     }
1549     for (j = bi[i]; j < bi[i + 1]; j++) {
1550       PetscInt    col = gcdest[bj[j]];
1551       PetscMPIInt cowner;
1552       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1553       if (rowner == cowner) dnnz[i]++;
1554       else onnz[i]++;
1555     }
1556   }
1557   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1558   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1559   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1560   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1561   PetscCall(PetscSFDestroy(&rowsf));
1562 
1563   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1564   PetscCall(MatSeqAIJGetArray(aA, &aa));
1565   PetscCall(MatSeqAIJGetArray(aB, &ba));
1566   for (i = 0; i < m; i++) {
1567     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1568     PetscInt  j0, rowlen;
1569     rowlen = ai[i + 1] - ai[i];
1570     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1571       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1572       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1573     }
1574     rowlen = bi[i + 1] - bi[i];
1575     for (j0 = j = 0; j < rowlen; j0 = j) {
1576       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1577       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1578     }
1579   }
1580   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1581   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1582   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1583   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1584   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1585   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1586   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1587   PetscCall(PetscFree3(work, rdest, cdest));
1588   PetscCall(PetscFree(gcdest));
1589   if (parcolp) PetscCall(ISDestroy(&colp));
1590   *B = Aperm;
1591   PetscFunctionReturn(PETSC_SUCCESS);
1592 }
1593 
1594 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1595 {
1596   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1597 
1598   PetscFunctionBegin;
1599   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1600   if (ghosts) *ghosts = aij->garray;
1601   PetscFunctionReturn(PETSC_SUCCESS);
1602 }
1603 
1604 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1605 {
1606   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1607   Mat            A = mat->A, B = mat->B;
1608   PetscLogDouble isend[5], irecv[5];
1609 
1610   PetscFunctionBegin;
1611   info->block_size = 1.0;
1612   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1613 
1614   isend[0] = info->nz_used;
1615   isend[1] = info->nz_allocated;
1616   isend[2] = info->nz_unneeded;
1617   isend[3] = info->memory;
1618   isend[4] = info->mallocs;
1619 
1620   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1621 
1622   isend[0] += info->nz_used;
1623   isend[1] += info->nz_allocated;
1624   isend[2] += info->nz_unneeded;
1625   isend[3] += info->memory;
1626   isend[4] += info->mallocs;
1627   if (flag == MAT_LOCAL) {
1628     info->nz_used      = isend[0];
1629     info->nz_allocated = isend[1];
1630     info->nz_unneeded  = isend[2];
1631     info->memory       = isend[3];
1632     info->mallocs      = isend[4];
1633   } else if (flag == MAT_GLOBAL_MAX) {
1634     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1635 
1636     info->nz_used      = irecv[0];
1637     info->nz_allocated = irecv[1];
1638     info->nz_unneeded  = irecv[2];
1639     info->memory       = irecv[3];
1640     info->mallocs      = irecv[4];
1641   } else if (flag == MAT_GLOBAL_SUM) {
1642     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1643 
1644     info->nz_used      = irecv[0];
1645     info->nz_allocated = irecv[1];
1646     info->nz_unneeded  = irecv[2];
1647     info->memory       = irecv[3];
1648     info->mallocs      = irecv[4];
1649   }
1650   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1651   info->fill_ratio_needed = 0;
1652   info->factor_mallocs    = 0;
1653   PetscFunctionReturn(PETSC_SUCCESS);
1654 }
1655 
1656 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1657 {
1658   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1659 
1660   PetscFunctionBegin;
1661   switch (op) {
1662   case MAT_NEW_NONZERO_LOCATIONS:
1663   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1664   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1665   case MAT_KEEP_NONZERO_PATTERN:
1666   case MAT_NEW_NONZERO_LOCATION_ERR:
1667   case MAT_USE_INODES:
1668   case MAT_IGNORE_ZERO_ENTRIES:
1669   case MAT_FORM_EXPLICIT_TRANSPOSE:
1670     MatCheckPreallocated(A, 1);
1671     PetscCall(MatSetOption(a->A, op, flg));
1672     PetscCall(MatSetOption(a->B, op, flg));
1673     break;
1674   case MAT_ROW_ORIENTED:
1675     MatCheckPreallocated(A, 1);
1676     a->roworiented = flg;
1677 
1678     PetscCall(MatSetOption(a->A, op, flg));
1679     PetscCall(MatSetOption(a->B, op, flg));
1680     break;
1681   case MAT_FORCE_DIAGONAL_ENTRIES:
1682   case MAT_SORTED_FULL:
1683     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1684     break;
1685   case MAT_IGNORE_OFF_PROC_ENTRIES:
1686     a->donotstash = flg;
1687     break;
1688   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1689   case MAT_SPD:
1690   case MAT_SYMMETRIC:
1691   case MAT_STRUCTURALLY_SYMMETRIC:
1692   case MAT_HERMITIAN:
1693   case MAT_SYMMETRY_ETERNAL:
1694   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1695   case MAT_SPD_ETERNAL:
1696     /* if the diagonal matrix is square it inherits some of the properties above */
1697     break;
1698   case MAT_SUBMAT_SINGLEIS:
1699     A->submat_singleis = flg;
1700     break;
1701   case MAT_STRUCTURE_ONLY:
1702     /* The option is handled directly by MatSetOption() */
1703     break;
1704   default:
1705     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1706   }
1707   PetscFunctionReturn(PETSC_SUCCESS);
1708 }
1709 
1710 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1711 {
1712   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1713   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1714   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1715   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1716   PetscInt    *cmap, *idx_p;
1717 
1718   PetscFunctionBegin;
1719   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1720   mat->getrowactive = PETSC_TRUE;
1721 
1722   if (!mat->rowvalues && (idx || v)) {
1723     /*
1724         allocate enough space to hold information from the longest row.
1725     */
1726     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1727     PetscInt    max = 1, tmp;
1728     for (i = 0; i < matin->rmap->n; i++) {
1729       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1730       if (max < tmp) max = tmp;
1731     }
1732     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1733   }
1734 
1735   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1736   lrow = row - rstart;
1737 
1738   pvA = &vworkA;
1739   pcA = &cworkA;
1740   pvB = &vworkB;
1741   pcB = &cworkB;
1742   if (!v) {
1743     pvA = NULL;
1744     pvB = NULL;
1745   }
1746   if (!idx) {
1747     pcA = NULL;
1748     if (!v) pcB = NULL;
1749   }
1750   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1751   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1752   nztot = nzA + nzB;
1753 
1754   cmap = mat->garray;
1755   if (v || idx) {
1756     if (nztot) {
1757       /* Sort by increasing column numbers, assuming A and B already sorted */
1758       PetscInt imark = -1;
1759       if (v) {
1760         *v = v_p = mat->rowvalues;
1761         for (i = 0; i < nzB; i++) {
1762           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1763           else break;
1764         }
1765         imark = i;
1766         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1767         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1768       }
1769       if (idx) {
1770         *idx = idx_p = mat->rowindices;
1771         if (imark > -1) {
1772           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1773         } else {
1774           for (i = 0; i < nzB; i++) {
1775             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1776             else break;
1777           }
1778           imark = i;
1779         }
1780         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1781         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1782       }
1783     } else {
1784       if (idx) *idx = NULL;
1785       if (v) *v = NULL;
1786     }
1787   }
1788   *nz = nztot;
1789   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1790   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1791   PetscFunctionReturn(PETSC_SUCCESS);
1792 }
1793 
1794 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1795 {
1796   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1797 
1798   PetscFunctionBegin;
1799   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1800   aij->getrowactive = PETSC_FALSE;
1801   PetscFunctionReturn(PETSC_SUCCESS);
1802 }
1803 
1804 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1805 {
1806   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1807   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1808   PetscInt         i, j, cstart = mat->cmap->rstart;
1809   PetscReal        sum = 0.0;
1810   const MatScalar *v, *amata, *bmata;
1811 
1812   PetscFunctionBegin;
1813   if (aij->size == 1) {
1814     PetscCall(MatNorm(aij->A, type, norm));
1815   } else {
1816     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1817     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1818     if (type == NORM_FROBENIUS) {
1819       v = amata;
1820       for (i = 0; i < amat->nz; i++) {
1821         sum += PetscRealPart(PetscConj(*v) * (*v));
1822         v++;
1823       }
1824       v = bmata;
1825       for (i = 0; i < bmat->nz; i++) {
1826         sum += PetscRealPart(PetscConj(*v) * (*v));
1827         v++;
1828       }
1829       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1830       *norm = PetscSqrtReal(*norm);
1831       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1832     } else if (type == NORM_1) { /* max column norm */
1833       PetscReal *tmp, *tmp2;
1834       PetscInt  *jj, *garray = aij->garray;
1835       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1836       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1837       *norm = 0.0;
1838       v     = amata;
1839       jj    = amat->j;
1840       for (j = 0; j < amat->nz; j++) {
1841         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1842         v++;
1843       }
1844       v  = bmata;
1845       jj = bmat->j;
1846       for (j = 0; j < bmat->nz; j++) {
1847         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1848         v++;
1849       }
1850       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1851       for (j = 0; j < mat->cmap->N; j++) {
1852         if (tmp2[j] > *norm) *norm = tmp2[j];
1853       }
1854       PetscCall(PetscFree(tmp));
1855       PetscCall(PetscFree(tmp2));
1856       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1857     } else if (type == NORM_INFINITY) { /* max row norm */
1858       PetscReal ntemp = 0.0;
1859       for (j = 0; j < aij->A->rmap->n; j++) {
1860         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1861         sum = 0.0;
1862         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1863           sum += PetscAbsScalar(*v);
1864           v++;
1865         }
1866         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1867         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1868           sum += PetscAbsScalar(*v);
1869           v++;
1870         }
1871         if (sum > ntemp) ntemp = sum;
1872       }
1873       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1874       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1875     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1876     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1877     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1878   }
1879   PetscFunctionReturn(PETSC_SUCCESS);
1880 }
1881 
1882 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1883 {
1884   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1885   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1886   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1887   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1888   Mat              B, A_diag, *B_diag;
1889   const MatScalar *pbv, *bv;
1890 
1891   PetscFunctionBegin;
1892   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1893   ma = A->rmap->n;
1894   na = A->cmap->n;
1895   mb = a->B->rmap->n;
1896   nb = a->B->cmap->n;
1897   ai = Aloc->i;
1898   aj = Aloc->j;
1899   bi = Bloc->i;
1900   bj = Bloc->j;
1901   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1902     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1903     PetscSFNode         *oloc;
1904     PETSC_UNUSED PetscSF sf;
1905 
1906     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1907     /* compute d_nnz for preallocation */
1908     PetscCall(PetscArrayzero(d_nnz, na));
1909     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1910     /* compute local off-diagonal contributions */
1911     PetscCall(PetscArrayzero(g_nnz, nb));
1912     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1913     /* map those to global */
1914     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1915     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1916     PetscCall(PetscSFSetFromOptions(sf));
1917     PetscCall(PetscArrayzero(o_nnz, na));
1918     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1919     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1920     PetscCall(PetscSFDestroy(&sf));
1921 
1922     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1923     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1924     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1925     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1926     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1927     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1928   } else {
1929     B = *matout;
1930     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1931   }
1932 
1933   b           = (Mat_MPIAIJ *)B->data;
1934   A_diag      = a->A;
1935   B_diag      = &b->A;
1936   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1937   A_diag_ncol = A_diag->cmap->N;
1938   B_diag_ilen = sub_B_diag->ilen;
1939   B_diag_i    = sub_B_diag->i;
1940 
1941   /* Set ilen for diagonal of B */
1942   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1943 
1944   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1945   very quickly (=without using MatSetValues), because all writes are local. */
1946   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1947   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1948 
1949   /* copy over the B part */
1950   PetscCall(PetscMalloc1(bi[mb], &cols));
1951   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1952   pbv = bv;
1953   row = A->rmap->rstart;
1954   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1955   cols_tmp = cols;
1956   for (i = 0; i < mb; i++) {
1957     ncol = bi[i + 1] - bi[i];
1958     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1959     row++;
1960     if (pbv) pbv += ncol;
1961     if (cols_tmp) cols_tmp += ncol;
1962   }
1963   PetscCall(PetscFree(cols));
1964   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1965 
1966   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1967   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1968   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1969     *matout = B;
1970   } else {
1971     PetscCall(MatHeaderMerge(A, &B));
1972   }
1973   PetscFunctionReturn(PETSC_SUCCESS);
1974 }
1975 
1976 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1977 {
1978   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1979   Mat         a = aij->A, b = aij->B;
1980   PetscInt    s1, s2, s3;
1981 
1982   PetscFunctionBegin;
1983   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1984   if (rr) {
1985     PetscCall(VecGetLocalSize(rr, &s1));
1986     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1987     /* Overlap communication with computation. */
1988     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1989   }
1990   if (ll) {
1991     PetscCall(VecGetLocalSize(ll, &s1));
1992     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1993     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1994   }
1995   /* scale  the diagonal block */
1996   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1997 
1998   if (rr) {
1999     /* Do a scatter end and then right scale the off-diagonal block */
2000     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2001     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2002   }
2003   PetscFunctionReturn(PETSC_SUCCESS);
2004 }
2005 
2006 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2007 {
2008   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2009 
2010   PetscFunctionBegin;
2011   PetscCall(MatSetUnfactored(a->A));
2012   PetscFunctionReturn(PETSC_SUCCESS);
2013 }
2014 
2015 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2016 {
2017   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2018   Mat         a, b, c, d;
2019   PetscBool   flg;
2020 
2021   PetscFunctionBegin;
2022   a = matA->A;
2023   b = matA->B;
2024   c = matB->A;
2025   d = matB->B;
2026 
2027   PetscCall(MatEqual(a, c, &flg));
2028   if (flg) PetscCall(MatEqual(b, d, &flg));
2029   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2030   PetscFunctionReturn(PETSC_SUCCESS);
2031 }
2032 
2033 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2034 {
2035   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2036   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2037 
2038   PetscFunctionBegin;
2039   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2040   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2041     /* because of the column compression in the off-processor part of the matrix a->B,
2042        the number of columns in a->B and b->B may be different, hence we cannot call
2043        the MatCopy() directly on the two parts. If need be, we can provide a more
2044        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2045        then copying the submatrices */
2046     PetscCall(MatCopy_Basic(A, B, str));
2047   } else {
2048     PetscCall(MatCopy(a->A, b->A, str));
2049     PetscCall(MatCopy(a->B, b->B, str));
2050   }
2051   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2052   PetscFunctionReturn(PETSC_SUCCESS);
2053 }
2054 
2055 /*
2056    Computes the number of nonzeros per row needed for preallocation when X and Y
2057    have different nonzero structure.
2058 */
2059 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2060 {
2061   PetscInt i, j, k, nzx, nzy;
2062 
2063   PetscFunctionBegin;
2064   /* Set the number of nonzeros in the new matrix */
2065   for (i = 0; i < m; i++) {
2066     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2067     nzx    = xi[i + 1] - xi[i];
2068     nzy    = yi[i + 1] - yi[i];
2069     nnz[i] = 0;
2070     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2071       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2072       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2073       nnz[i]++;
2074     }
2075     for (; k < nzy; k++) nnz[i]++;
2076   }
2077   PetscFunctionReturn(PETSC_SUCCESS);
2078 }
2079 
2080 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2081 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2082 {
2083   PetscInt    m = Y->rmap->N;
2084   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2085   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2086 
2087   PetscFunctionBegin;
2088   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2089   PetscFunctionReturn(PETSC_SUCCESS);
2090 }
2091 
2092 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2093 {
2094   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2095 
2096   PetscFunctionBegin;
2097   if (str == SAME_NONZERO_PATTERN) {
2098     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2099     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2100   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2101     PetscCall(MatAXPY_Basic(Y, a, X, str));
2102   } else {
2103     Mat       B;
2104     PetscInt *nnz_d, *nnz_o;
2105 
2106     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2107     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2108     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2109     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2110     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2111     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2112     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2113     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2114     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2115     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2116     PetscCall(MatHeaderMerge(Y, &B));
2117     PetscCall(PetscFree(nnz_d));
2118     PetscCall(PetscFree(nnz_o));
2119   }
2120   PetscFunctionReturn(PETSC_SUCCESS);
2121 }
2122 
2123 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2124 
2125 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2126 {
2127   PetscFunctionBegin;
2128   if (PetscDefined(USE_COMPLEX)) {
2129     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2130 
2131     PetscCall(MatConjugate_SeqAIJ(aij->A));
2132     PetscCall(MatConjugate_SeqAIJ(aij->B));
2133   }
2134   PetscFunctionReturn(PETSC_SUCCESS);
2135 }
2136 
2137 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2138 {
2139   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2140 
2141   PetscFunctionBegin;
2142   PetscCall(MatRealPart(a->A));
2143   PetscCall(MatRealPart(a->B));
2144   PetscFunctionReturn(PETSC_SUCCESS);
2145 }
2146 
2147 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2148 {
2149   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2150 
2151   PetscFunctionBegin;
2152   PetscCall(MatImaginaryPart(a->A));
2153   PetscCall(MatImaginaryPart(a->B));
2154   PetscFunctionReturn(PETSC_SUCCESS);
2155 }
2156 
2157 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2158 {
2159   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2160   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2161   PetscScalar       *va, *vv;
2162   Vec                vB, vA;
2163   const PetscScalar *vb;
2164 
2165   PetscFunctionBegin;
2166   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2167   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2168 
2169   PetscCall(VecGetArrayWrite(vA, &va));
2170   if (idx) {
2171     for (i = 0; i < m; i++) {
2172       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2173     }
2174   }
2175 
2176   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2177   PetscCall(PetscMalloc1(m, &idxb));
2178   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2179 
2180   PetscCall(VecGetArrayWrite(v, &vv));
2181   PetscCall(VecGetArrayRead(vB, &vb));
2182   for (i = 0; i < m; i++) {
2183     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2184       vv[i] = vb[i];
2185       if (idx) idx[i] = a->garray[idxb[i]];
2186     } else {
2187       vv[i] = va[i];
2188       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2189     }
2190   }
2191   PetscCall(VecRestoreArrayWrite(vA, &vv));
2192   PetscCall(VecRestoreArrayWrite(vA, &va));
2193   PetscCall(VecRestoreArrayRead(vB, &vb));
2194   PetscCall(PetscFree(idxb));
2195   PetscCall(VecDestroy(&vA));
2196   PetscCall(VecDestroy(&vB));
2197   PetscFunctionReturn(PETSC_SUCCESS);
2198 }
2199 
2200 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2201 {
2202   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2203   PetscInt    m = A->rmap->n;
2204   Vec         vB, vA;
2205 
2206   PetscFunctionBegin;
2207   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2208   PetscCall(MatGetRowSumAbs(a->A, vA));
2209   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2210   PetscCall(MatGetRowSumAbs(a->B, vB));
2211   PetscCall(VecAXPY(vA, 1.0, vB));
2212   PetscCall(VecDestroy(&vB));
2213   PetscCall(VecCopy(vA, v));
2214   PetscCall(VecDestroy(&vA));
2215   PetscFunctionReturn(PETSC_SUCCESS);
2216 }
2217 
2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2219 {
2220   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2221   PetscInt           m = A->rmap->n, n = A->cmap->n;
2222   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2223   PetscInt          *cmap = mat->garray;
2224   PetscInt          *diagIdx, *offdiagIdx;
2225   Vec                diagV, offdiagV;
2226   PetscScalar       *a, *diagA, *offdiagA;
2227   const PetscScalar *ba, *bav;
2228   PetscInt           r, j, col, ncols, *bi, *bj;
2229   Mat                B = mat->B;
2230   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2231 
2232   PetscFunctionBegin;
2233   /* When a process holds entire A and other processes have no entry */
2234   if (A->cmap->N == n) {
2235     PetscCall(VecGetArrayWrite(v, &diagA));
2236     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2237     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2238     PetscCall(VecDestroy(&diagV));
2239     PetscCall(VecRestoreArrayWrite(v, &diagA));
2240     PetscFunctionReturn(PETSC_SUCCESS);
2241   } else if (n == 0) {
2242     if (m) {
2243       PetscCall(VecGetArrayWrite(v, &a));
2244       for (r = 0; r < m; r++) {
2245         a[r] = 0.0;
2246         if (idx) idx[r] = -1;
2247       }
2248       PetscCall(VecRestoreArrayWrite(v, &a));
2249     }
2250     PetscFunctionReturn(PETSC_SUCCESS);
2251   }
2252 
2253   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2254   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2255   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2256   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2257 
2258   /* Get offdiagIdx[] for implicit 0.0 */
2259   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2260   ba = bav;
2261   bi = b->i;
2262   bj = b->j;
2263   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2264   for (r = 0; r < m; r++) {
2265     ncols = bi[r + 1] - bi[r];
2266     if (ncols == A->cmap->N - n) { /* Brow is dense */
2267       offdiagA[r]   = *ba;
2268       offdiagIdx[r] = cmap[0];
2269     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2270       offdiagA[r] = 0.0;
2271 
2272       /* Find first hole in the cmap */
2273       for (j = 0; j < ncols; j++) {
2274         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2275         if (col > j && j < cstart) {
2276           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2277           break;
2278         } else if (col > j + n && j >= cstart) {
2279           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2280           break;
2281         }
2282       }
2283       if (j == ncols && ncols < A->cmap->N - n) {
2284         /* a hole is outside compressed Bcols */
2285         if (ncols == 0) {
2286           if (cstart) {
2287             offdiagIdx[r] = 0;
2288           } else offdiagIdx[r] = cend;
2289         } else { /* ncols > 0 */
2290           offdiagIdx[r] = cmap[ncols - 1] + 1;
2291           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2292         }
2293       }
2294     }
2295 
2296     for (j = 0; j < ncols; j++) {
2297       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2298         offdiagA[r]   = *ba;
2299         offdiagIdx[r] = cmap[*bj];
2300       }
2301       ba++;
2302       bj++;
2303     }
2304   }
2305 
2306   PetscCall(VecGetArrayWrite(v, &a));
2307   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2308   for (r = 0; r < m; ++r) {
2309     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2310       a[r] = diagA[r];
2311       if (idx) idx[r] = cstart + diagIdx[r];
2312     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2313       a[r] = diagA[r];
2314       if (idx) {
2315         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2316           idx[r] = cstart + diagIdx[r];
2317         } else idx[r] = offdiagIdx[r];
2318       }
2319     } else {
2320       a[r] = offdiagA[r];
2321       if (idx) idx[r] = offdiagIdx[r];
2322     }
2323   }
2324   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2325   PetscCall(VecRestoreArrayWrite(v, &a));
2326   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2327   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2328   PetscCall(VecDestroy(&diagV));
2329   PetscCall(VecDestroy(&offdiagV));
2330   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2331   PetscFunctionReturn(PETSC_SUCCESS);
2332 }
2333 
2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2335 {
2336   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2337   PetscInt           m = A->rmap->n, n = A->cmap->n;
2338   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2339   PetscInt          *cmap = mat->garray;
2340   PetscInt          *diagIdx, *offdiagIdx;
2341   Vec                diagV, offdiagV;
2342   PetscScalar       *a, *diagA, *offdiagA;
2343   const PetscScalar *ba, *bav;
2344   PetscInt           r, j, col, ncols, *bi, *bj;
2345   Mat                B = mat->B;
2346   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2347 
2348   PetscFunctionBegin;
2349   /* When a process holds entire A and other processes have no entry */
2350   if (A->cmap->N == n) {
2351     PetscCall(VecGetArrayWrite(v, &diagA));
2352     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2353     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2354     PetscCall(VecDestroy(&diagV));
2355     PetscCall(VecRestoreArrayWrite(v, &diagA));
2356     PetscFunctionReturn(PETSC_SUCCESS);
2357   } else if (n == 0) {
2358     if (m) {
2359       PetscCall(VecGetArrayWrite(v, &a));
2360       for (r = 0; r < m; r++) {
2361         a[r] = PETSC_MAX_REAL;
2362         if (idx) idx[r] = -1;
2363       }
2364       PetscCall(VecRestoreArrayWrite(v, &a));
2365     }
2366     PetscFunctionReturn(PETSC_SUCCESS);
2367   }
2368 
2369   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2370   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2371   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2372   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2373 
2374   /* Get offdiagIdx[] for implicit 0.0 */
2375   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2376   ba = bav;
2377   bi = b->i;
2378   bj = b->j;
2379   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2380   for (r = 0; r < m; r++) {
2381     ncols = bi[r + 1] - bi[r];
2382     if (ncols == A->cmap->N - n) { /* Brow is dense */
2383       offdiagA[r]   = *ba;
2384       offdiagIdx[r] = cmap[0];
2385     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2386       offdiagA[r] = 0.0;
2387 
2388       /* Find first hole in the cmap */
2389       for (j = 0; j < ncols; j++) {
2390         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2391         if (col > j && j < cstart) {
2392           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2393           break;
2394         } else if (col > j + n && j >= cstart) {
2395           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2396           break;
2397         }
2398       }
2399       if (j == ncols && ncols < A->cmap->N - n) {
2400         /* a hole is outside compressed Bcols */
2401         if (ncols == 0) {
2402           if (cstart) {
2403             offdiagIdx[r] = 0;
2404           } else offdiagIdx[r] = cend;
2405         } else { /* ncols > 0 */
2406           offdiagIdx[r] = cmap[ncols - 1] + 1;
2407           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2408         }
2409       }
2410     }
2411 
2412     for (j = 0; j < ncols; j++) {
2413       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2414         offdiagA[r]   = *ba;
2415         offdiagIdx[r] = cmap[*bj];
2416       }
2417       ba++;
2418       bj++;
2419     }
2420   }
2421 
2422   PetscCall(VecGetArrayWrite(v, &a));
2423   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2424   for (r = 0; r < m; ++r) {
2425     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2426       a[r] = diagA[r];
2427       if (idx) idx[r] = cstart + diagIdx[r];
2428     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2429       a[r] = diagA[r];
2430       if (idx) {
2431         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2432           idx[r] = cstart + diagIdx[r];
2433         } else idx[r] = offdiagIdx[r];
2434       }
2435     } else {
2436       a[r] = offdiagA[r];
2437       if (idx) idx[r] = offdiagIdx[r];
2438     }
2439   }
2440   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2441   PetscCall(VecRestoreArrayWrite(v, &a));
2442   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2443   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2444   PetscCall(VecDestroy(&diagV));
2445   PetscCall(VecDestroy(&offdiagV));
2446   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2447   PetscFunctionReturn(PETSC_SUCCESS);
2448 }
2449 
2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2451 {
2452   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2453   PetscInt           m = A->rmap->n, n = A->cmap->n;
2454   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2455   PetscInt          *cmap = mat->garray;
2456   PetscInt          *diagIdx, *offdiagIdx;
2457   Vec                diagV, offdiagV;
2458   PetscScalar       *a, *diagA, *offdiagA;
2459   const PetscScalar *ba, *bav;
2460   PetscInt           r, j, col, ncols, *bi, *bj;
2461   Mat                B = mat->B;
2462   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2463 
2464   PetscFunctionBegin;
2465   /* When a process holds entire A and other processes have no entry */
2466   if (A->cmap->N == n) {
2467     PetscCall(VecGetArrayWrite(v, &diagA));
2468     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2469     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2470     PetscCall(VecDestroy(&diagV));
2471     PetscCall(VecRestoreArrayWrite(v, &diagA));
2472     PetscFunctionReturn(PETSC_SUCCESS);
2473   } else if (n == 0) {
2474     if (m) {
2475       PetscCall(VecGetArrayWrite(v, &a));
2476       for (r = 0; r < m; r++) {
2477         a[r] = PETSC_MIN_REAL;
2478         if (idx) idx[r] = -1;
2479       }
2480       PetscCall(VecRestoreArrayWrite(v, &a));
2481     }
2482     PetscFunctionReturn(PETSC_SUCCESS);
2483   }
2484 
2485   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2486   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2487   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2488   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2489 
2490   /* Get offdiagIdx[] for implicit 0.0 */
2491   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2492   ba = bav;
2493   bi = b->i;
2494   bj = b->j;
2495   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2496   for (r = 0; r < m; r++) {
2497     ncols = bi[r + 1] - bi[r];
2498     if (ncols == A->cmap->N - n) { /* Brow is dense */
2499       offdiagA[r]   = *ba;
2500       offdiagIdx[r] = cmap[0];
2501     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2502       offdiagA[r] = 0.0;
2503 
2504       /* Find first hole in the cmap */
2505       for (j = 0; j < ncols; j++) {
2506         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2507         if (col > j && j < cstart) {
2508           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2509           break;
2510         } else if (col > j + n && j >= cstart) {
2511           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2512           break;
2513         }
2514       }
2515       if (j == ncols && ncols < A->cmap->N - n) {
2516         /* a hole is outside compressed Bcols */
2517         if (ncols == 0) {
2518           if (cstart) {
2519             offdiagIdx[r] = 0;
2520           } else offdiagIdx[r] = cend;
2521         } else { /* ncols > 0 */
2522           offdiagIdx[r] = cmap[ncols - 1] + 1;
2523           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2524         }
2525       }
2526     }
2527 
2528     for (j = 0; j < ncols; j++) {
2529       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2530         offdiagA[r]   = *ba;
2531         offdiagIdx[r] = cmap[*bj];
2532       }
2533       ba++;
2534       bj++;
2535     }
2536   }
2537 
2538   PetscCall(VecGetArrayWrite(v, &a));
2539   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2540   for (r = 0; r < m; ++r) {
2541     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2542       a[r] = diagA[r];
2543       if (idx) idx[r] = cstart + diagIdx[r];
2544     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2545       a[r] = diagA[r];
2546       if (idx) {
2547         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2548           idx[r] = cstart + diagIdx[r];
2549         } else idx[r] = offdiagIdx[r];
2550       }
2551     } else {
2552       a[r] = offdiagA[r];
2553       if (idx) idx[r] = offdiagIdx[r];
2554     }
2555   }
2556   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2557   PetscCall(VecRestoreArrayWrite(v, &a));
2558   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2559   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2560   PetscCall(VecDestroy(&diagV));
2561   PetscCall(VecDestroy(&offdiagV));
2562   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2563   PetscFunctionReturn(PETSC_SUCCESS);
2564 }
2565 
2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2567 {
2568   Mat *dummy;
2569 
2570   PetscFunctionBegin;
2571   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2572   *newmat = *dummy;
2573   PetscCall(PetscFree(dummy));
2574   PetscFunctionReturn(PETSC_SUCCESS);
2575 }
2576 
2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2578 {
2579   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2580 
2581   PetscFunctionBegin;
2582   PetscCall(MatInvertBlockDiagonal(a->A, values));
2583   A->factorerrortype = a->A->factorerrortype;
2584   PetscFunctionReturn(PETSC_SUCCESS);
2585 }
2586 
2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2588 {
2589   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2590 
2591   PetscFunctionBegin;
2592   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2593   PetscCall(MatSetRandom(aij->A, rctx));
2594   if (x->assembled) {
2595     PetscCall(MatSetRandom(aij->B, rctx));
2596   } else {
2597     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2598   }
2599   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2600   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2601   PetscFunctionReturn(PETSC_SUCCESS);
2602 }
2603 
2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2605 {
2606   PetscFunctionBegin;
2607   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2608   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2609   PetscFunctionReturn(PETSC_SUCCESS);
2610 }
2611 
2612 /*@
2613   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2614 
2615   Not Collective
2616 
2617   Input Parameter:
2618 . A - the matrix
2619 
2620   Output Parameter:
2621 . nz - the number of nonzeros
2622 
2623   Level: advanced
2624 
2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2626 @*/
2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2628 {
2629   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2630   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2631   PetscBool   isaij;
2632 
2633   PetscFunctionBegin;
2634   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2635   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2636   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2637   PetscFunctionReturn(PETSC_SUCCESS);
2638 }
2639 
2640 /*@
2641   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2642 
2643   Collective
2644 
2645   Input Parameters:
2646 + A  - the matrix
2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2648 
2649   Level: advanced
2650 
2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2652 @*/
2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2654 {
2655   PetscFunctionBegin;
2656   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2657   PetscFunctionReturn(PETSC_SUCCESS);
2658 }
2659 
2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2661 {
2662   PetscBool sc = PETSC_FALSE, flg;
2663 
2664   PetscFunctionBegin;
2665   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2666   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2667   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2668   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2669   PetscOptionsHeadEnd();
2670   PetscFunctionReturn(PETSC_SUCCESS);
2671 }
2672 
2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2674 {
2675   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2676   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2677 
2678   PetscFunctionBegin;
2679   if (!Y->preallocated) {
2680     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2681   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2682     PetscInt nonew = aij->nonew;
2683     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2684     aij->nonew = nonew;
2685   }
2686   PetscCall(MatShift_Basic(Y, a));
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2691 {
2692   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2693 
2694   PetscFunctionBegin;
2695   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2696   PetscCall(MatMissingDiagonal(a->A, missing, d));
2697   if (d) {
2698     PetscInt rstart;
2699     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2700     *d += rstart;
2701   }
2702   PetscFunctionReturn(PETSC_SUCCESS);
2703 }
2704 
2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2706 {
2707   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2708 
2709   PetscFunctionBegin;
2710   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2711   PetscFunctionReturn(PETSC_SUCCESS);
2712 }
2713 
2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2715 {
2716   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2717 
2718   PetscFunctionBegin;
2719   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2720   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2721   PetscFunctionReturn(PETSC_SUCCESS);
2722 }
2723 
2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2725                                        MatGetRow_MPIAIJ,
2726                                        MatRestoreRow_MPIAIJ,
2727                                        MatMult_MPIAIJ,
2728                                        /* 4*/ MatMultAdd_MPIAIJ,
2729                                        MatMultTranspose_MPIAIJ,
2730                                        MatMultTransposeAdd_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        /*10*/ NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        MatSOR_MPIAIJ,
2738                                        MatTranspose_MPIAIJ,
2739                                        /*15*/ MatGetInfo_MPIAIJ,
2740                                        MatEqual_MPIAIJ,
2741                                        MatGetDiagonal_MPIAIJ,
2742                                        MatDiagonalScale_MPIAIJ,
2743                                        MatNorm_MPIAIJ,
2744                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2745                                        MatAssemblyEnd_MPIAIJ,
2746                                        MatSetOption_MPIAIJ,
2747                                        MatZeroEntries_MPIAIJ,
2748                                        /*24*/ MatZeroRows_MPIAIJ,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                        /*29*/ MatSetUp_MPI_Hash,
2754                                        NULL,
2755                                        NULL,
2756                                        MatGetDiagonalBlock_MPIAIJ,
2757                                        NULL,
2758                                        /*34*/ MatDuplicate_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        /*39*/ MatAXPY_MPIAIJ,
2764                                        MatCreateSubMatrices_MPIAIJ,
2765                                        MatIncreaseOverlap_MPIAIJ,
2766                                        MatGetValues_MPIAIJ,
2767                                        MatCopy_MPIAIJ,
2768                                        /*44*/ MatGetRowMax_MPIAIJ,
2769                                        MatScale_MPIAIJ,
2770                                        MatShift_MPIAIJ,
2771                                        MatDiagonalSet_MPIAIJ,
2772                                        MatZeroRowsColumns_MPIAIJ,
2773                                        /*49*/ MatSetRandom_MPIAIJ,
2774                                        MatGetRowIJ_MPIAIJ,
2775                                        MatRestoreRowIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2779                                        NULL,
2780                                        MatSetUnfactored_MPIAIJ,
2781                                        MatPermute_MPIAIJ,
2782                                        NULL,
2783                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2784                                        MatDestroy_MPIAIJ,
2785                                        MatView_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        /*64*/ NULL,
2789                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2790                                        NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2794                                        MatGetRowMinAbs_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        /*75*/ MatFDColoringApply_AIJ,
2800                                        MatSetFromOptions_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        MatFindZeroDiagonals_MPIAIJ,
2804                                        /*80*/ NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        /*83*/ MatLoad_MPIAIJ,
2808                                        MatIsSymmetric_MPIAIJ,
2809                                        NULL,
2810                                        NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        /*89*/ NULL,
2814                                        NULL,
2815                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2816                                        NULL,
2817                                        NULL,
2818                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                        MatBindToCPU_MPIAIJ,
2823                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2824                                        NULL,
2825                                        NULL,
2826                                        MatConjugate_MPIAIJ,
2827                                        NULL,
2828                                        /*104*/ MatSetValuesRow_MPIAIJ,
2829                                        MatRealPart_MPIAIJ,
2830                                        MatImaginaryPart_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        /*109*/ NULL,
2834                                        NULL,
2835                                        MatGetRowMin_MPIAIJ,
2836                                        NULL,
2837                                        MatMissingDiagonal_MPIAIJ,
2838                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2839                                        NULL,
2840                                        MatGetGhosts_MPIAIJ,
2841                                        NULL,
2842                                        NULL,
2843                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2844                                        NULL,
2845                                        NULL,
2846                                        NULL,
2847                                        MatGetMultiProcBlock_MPIAIJ,
2848                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2849                                        MatGetColumnReductions_MPIAIJ,
2850                                        MatInvertBlockDiagonal_MPIAIJ,
2851                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2852                                        MatCreateSubMatricesMPI_MPIAIJ,
2853                                        /*129*/ NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2857                                        NULL,
2858                                        /*134*/ NULL,
2859                                        NULL,
2860                                        NULL,
2861                                        NULL,
2862                                        NULL,
2863                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2864                                        NULL,
2865                                        NULL,
2866                                        MatFDColoringSetUp_MPIXAIJ,
2867                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2868                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2869                                        /*145*/ NULL,
2870                                        NULL,
2871                                        NULL,
2872                                        MatCreateGraph_Simple_AIJ,
2873                                        NULL,
2874                                        /*150*/ NULL,
2875                                        MatEliminateZeros_MPIAIJ,
2876                                        MatGetRowSumAbs_MPIAIJ};
2877 
2878 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2879 {
2880   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2881 
2882   PetscFunctionBegin;
2883   PetscCall(MatStoreValues(aij->A));
2884   PetscCall(MatStoreValues(aij->B));
2885   PetscFunctionReturn(PETSC_SUCCESS);
2886 }
2887 
2888 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2889 {
2890   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2891 
2892   PetscFunctionBegin;
2893   PetscCall(MatRetrieveValues(aij->A));
2894   PetscCall(MatRetrieveValues(aij->B));
2895   PetscFunctionReturn(PETSC_SUCCESS);
2896 }
2897 
2898 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2899 {
2900   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2901   PetscMPIInt size;
2902 
2903   PetscFunctionBegin;
2904   if (B->hash_active) {
2905     B->ops[0]      = b->cops;
2906     B->hash_active = PETSC_FALSE;
2907   }
2908   PetscCall(PetscLayoutSetUp(B->rmap));
2909   PetscCall(PetscLayoutSetUp(B->cmap));
2910 
2911 #if defined(PETSC_USE_CTABLE)
2912   PetscCall(PetscHMapIDestroy(&b->colmap));
2913 #else
2914   PetscCall(PetscFree(b->colmap));
2915 #endif
2916   PetscCall(PetscFree(b->garray));
2917   PetscCall(VecDestroy(&b->lvec));
2918   PetscCall(VecScatterDestroy(&b->Mvctx));
2919 
2920   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2921 
2922   MatSeqXAIJGetOptions_Private(b->B);
2923   PetscCall(MatDestroy(&b->B));
2924   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2925   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2926   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2927   PetscCall(MatSetType(b->B, MATSEQAIJ));
2928   MatSeqXAIJRestoreOptions_Private(b->B);
2929 
2930   MatSeqXAIJGetOptions_Private(b->A);
2931   PetscCall(MatDestroy(&b->A));
2932   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2933   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2934   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2935   PetscCall(MatSetType(b->A, MATSEQAIJ));
2936   MatSeqXAIJRestoreOptions_Private(b->A);
2937 
2938   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2939   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2940   B->preallocated  = PETSC_TRUE;
2941   B->was_assembled = PETSC_FALSE;
2942   B->assembled     = PETSC_FALSE;
2943   PetscFunctionReturn(PETSC_SUCCESS);
2944 }
2945 
2946 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2947 {
2948   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2949 
2950   PetscFunctionBegin;
2951   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2952   PetscCall(PetscLayoutSetUp(B->rmap));
2953   PetscCall(PetscLayoutSetUp(B->cmap));
2954 
2955 #if defined(PETSC_USE_CTABLE)
2956   PetscCall(PetscHMapIDestroy(&b->colmap));
2957 #else
2958   PetscCall(PetscFree(b->colmap));
2959 #endif
2960   PetscCall(PetscFree(b->garray));
2961   PetscCall(VecDestroy(&b->lvec));
2962   PetscCall(VecScatterDestroy(&b->Mvctx));
2963 
2964   PetscCall(MatResetPreallocation(b->A));
2965   PetscCall(MatResetPreallocation(b->B));
2966   B->preallocated  = PETSC_TRUE;
2967   B->was_assembled = PETSC_FALSE;
2968   B->assembled     = PETSC_FALSE;
2969   PetscFunctionReturn(PETSC_SUCCESS);
2970 }
2971 
2972 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2973 {
2974   Mat         mat;
2975   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2976 
2977   PetscFunctionBegin;
2978   *newmat = NULL;
2979   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2980   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2981   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2982   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2983   a = (Mat_MPIAIJ *)mat->data;
2984 
2985   mat->factortype = matin->factortype;
2986   mat->assembled  = matin->assembled;
2987   mat->insertmode = NOT_SET_VALUES;
2988 
2989   a->size         = oldmat->size;
2990   a->rank         = oldmat->rank;
2991   a->donotstash   = oldmat->donotstash;
2992   a->roworiented  = oldmat->roworiented;
2993   a->rowindices   = NULL;
2994   a->rowvalues    = NULL;
2995   a->getrowactive = PETSC_FALSE;
2996 
2997   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2998   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2999   if (matin->hash_active) {
3000     PetscCall(MatSetUp(mat));
3001   } else {
3002     mat->preallocated = matin->preallocated;
3003     if (oldmat->colmap) {
3004 #if defined(PETSC_USE_CTABLE)
3005       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3006 #else
3007       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3008       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3009 #endif
3010     } else a->colmap = NULL;
3011     if (oldmat->garray) {
3012       PetscInt len;
3013       len = oldmat->B->cmap->n;
3014       PetscCall(PetscMalloc1(len + 1, &a->garray));
3015       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3016     } else a->garray = NULL;
3017 
3018     /* It may happen MatDuplicate is called with a non-assembled matrix
3019       In fact, MatDuplicate only requires the matrix to be preallocated
3020       This may happen inside a DMCreateMatrix_Shell */
3021     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3022     if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3023     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3024     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3025   }
3026   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3027   *newmat = mat;
3028   PetscFunctionReturn(PETSC_SUCCESS);
3029 }
3030 
3031 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3032 {
3033   PetscBool isbinary, ishdf5;
3034 
3035   PetscFunctionBegin;
3036   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3037   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3038   /* force binary viewer to load .info file if it has not yet done so */
3039   PetscCall(PetscViewerSetUp(viewer));
3040   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3041   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3042   if (isbinary) {
3043     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3044   } else if (ishdf5) {
3045 #if defined(PETSC_HAVE_HDF5)
3046     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3047 #else
3048     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3049 #endif
3050   } else {
3051     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3052   }
3053   PetscFunctionReturn(PETSC_SUCCESS);
3054 }
3055 
3056 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3057 {
3058   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3059   PetscInt    *rowidxs, *colidxs;
3060   PetscScalar *matvals;
3061 
3062   PetscFunctionBegin;
3063   PetscCall(PetscViewerSetUp(viewer));
3064 
3065   /* read in matrix header */
3066   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3067   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3068   M  = header[1];
3069   N  = header[2];
3070   nz = header[3];
3071   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3072   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3073   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3074 
3075   /* set block sizes from the viewer's .info file */
3076   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3077   /* set global sizes if not set already */
3078   if (mat->rmap->N < 0) mat->rmap->N = M;
3079   if (mat->cmap->N < 0) mat->cmap->N = N;
3080   PetscCall(PetscLayoutSetUp(mat->rmap));
3081   PetscCall(PetscLayoutSetUp(mat->cmap));
3082 
3083   /* check if the matrix sizes are correct */
3084   PetscCall(MatGetSize(mat, &rows, &cols));
3085   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3086 
3087   /* read in row lengths and build row indices */
3088   PetscCall(MatGetLocalSize(mat, &m, NULL));
3089   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3090   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3091   rowidxs[0] = 0;
3092   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3093   if (nz != PETSC_MAX_INT) {
3094     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3095     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3096   }
3097 
3098   /* read in column indices and matrix values */
3099   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3100   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3101   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3102   /* store matrix indices and values */
3103   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3104   PetscCall(PetscFree(rowidxs));
3105   PetscCall(PetscFree2(colidxs, matvals));
3106   PetscFunctionReturn(PETSC_SUCCESS);
3107 }
3108 
3109 /* Not scalable because of ISAllGather() unless getting all columns. */
3110 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3111 {
3112   IS          iscol_local;
3113   PetscBool   isstride;
3114   PetscMPIInt lisstride = 0, gisstride;
3115 
3116   PetscFunctionBegin;
3117   /* check if we are grabbing all columns*/
3118   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3119 
3120   if (isstride) {
3121     PetscInt start, len, mstart, mlen;
3122     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3123     PetscCall(ISGetLocalSize(iscol, &len));
3124     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3125     if (mstart == start && mlen - mstart == len) lisstride = 1;
3126   }
3127 
3128   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3129   if (gisstride) {
3130     PetscInt N;
3131     PetscCall(MatGetSize(mat, NULL, &N));
3132     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3133     PetscCall(ISSetIdentity(iscol_local));
3134     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3135   } else {
3136     PetscInt cbs;
3137     PetscCall(ISGetBlockSize(iscol, &cbs));
3138     PetscCall(ISAllGather(iscol, &iscol_local));
3139     PetscCall(ISSetBlockSize(iscol_local, cbs));
3140   }
3141 
3142   *isseq = iscol_local;
3143   PetscFunctionReturn(PETSC_SUCCESS);
3144 }
3145 
3146 /*
3147  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3148  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3149 
3150  Input Parameters:
3151 +   mat - matrix
3152 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3153            i.e., mat->rstart <= isrow[i] < mat->rend
3154 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3155            i.e., mat->cstart <= iscol[i] < mat->cend
3156 
3157  Output Parameters:
3158 +   isrow_d - sequential row index set for retrieving mat->A
3159 .   iscol_d - sequential  column index set for retrieving mat->A
3160 .   iscol_o - sequential column index set for retrieving mat->B
3161 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3162  */
3163 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3164 {
3165   Vec             x, cmap;
3166   const PetscInt *is_idx;
3167   PetscScalar    *xarray, *cmaparray;
3168   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3169   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3170   Mat             B    = a->B;
3171   Vec             lvec = a->lvec, lcmap;
3172   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3173   MPI_Comm        comm;
3174   VecScatter      Mvctx = a->Mvctx;
3175 
3176   PetscFunctionBegin;
3177   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3178   PetscCall(ISGetLocalSize(iscol, &ncols));
3179 
3180   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3181   PetscCall(MatCreateVecs(mat, &x, NULL));
3182   PetscCall(VecSet(x, -1.0));
3183   PetscCall(VecDuplicate(x, &cmap));
3184   PetscCall(VecSet(cmap, -1.0));
3185 
3186   /* Get start indices */
3187   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3188   isstart -= ncols;
3189   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3190 
3191   PetscCall(ISGetIndices(iscol, &is_idx));
3192   PetscCall(VecGetArray(x, &xarray));
3193   PetscCall(VecGetArray(cmap, &cmaparray));
3194   PetscCall(PetscMalloc1(ncols, &idx));
3195   for (i = 0; i < ncols; i++) {
3196     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3197     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3198     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3199   }
3200   PetscCall(VecRestoreArray(x, &xarray));
3201   PetscCall(VecRestoreArray(cmap, &cmaparray));
3202   PetscCall(ISRestoreIndices(iscol, &is_idx));
3203 
3204   /* Get iscol_d */
3205   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3206   PetscCall(ISGetBlockSize(iscol, &i));
3207   PetscCall(ISSetBlockSize(*iscol_d, i));
3208 
3209   /* Get isrow_d */
3210   PetscCall(ISGetLocalSize(isrow, &m));
3211   rstart = mat->rmap->rstart;
3212   PetscCall(PetscMalloc1(m, &idx));
3213   PetscCall(ISGetIndices(isrow, &is_idx));
3214   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3215   PetscCall(ISRestoreIndices(isrow, &is_idx));
3216 
3217   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3218   PetscCall(ISGetBlockSize(isrow, &i));
3219   PetscCall(ISSetBlockSize(*isrow_d, i));
3220 
3221   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3222   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3223   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3224 
3225   PetscCall(VecDuplicate(lvec, &lcmap));
3226 
3227   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3228   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3229 
3230   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3231   /* off-process column indices */
3232   count = 0;
3233   PetscCall(PetscMalloc1(Bn, &idx));
3234   PetscCall(PetscMalloc1(Bn, &cmap1));
3235 
3236   PetscCall(VecGetArray(lvec, &xarray));
3237   PetscCall(VecGetArray(lcmap, &cmaparray));
3238   for (i = 0; i < Bn; i++) {
3239     if (PetscRealPart(xarray[i]) > -1.0) {
3240       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3241       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3242       count++;
3243     }
3244   }
3245   PetscCall(VecRestoreArray(lvec, &xarray));
3246   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3247 
3248   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3249   /* cannot ensure iscol_o has same blocksize as iscol! */
3250 
3251   PetscCall(PetscFree(idx));
3252   *garray = cmap1;
3253 
3254   PetscCall(VecDestroy(&x));
3255   PetscCall(VecDestroy(&cmap));
3256   PetscCall(VecDestroy(&lcmap));
3257   PetscFunctionReturn(PETSC_SUCCESS);
3258 }
3259 
3260 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3261 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3262 {
3263   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3264   Mat         M = NULL;
3265   MPI_Comm    comm;
3266   IS          iscol_d, isrow_d, iscol_o;
3267   Mat         Asub = NULL, Bsub = NULL;
3268   PetscInt    n;
3269 
3270   PetscFunctionBegin;
3271   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3272 
3273   if (call == MAT_REUSE_MATRIX) {
3274     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3275     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3276     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3277 
3278     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3279     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3280 
3281     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3282     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3283 
3284     /* Update diagonal and off-diagonal portions of submat */
3285     asub = (Mat_MPIAIJ *)(*submat)->data;
3286     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3287     PetscCall(ISGetLocalSize(iscol_o, &n));
3288     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3289     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3290     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3291 
3292   } else { /* call == MAT_INITIAL_MATRIX) */
3293     const PetscInt *garray;
3294     PetscInt        BsubN;
3295 
3296     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3297     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3298 
3299     /* Create local submatrices Asub and Bsub */
3300     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3301     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3302 
3303     /* Create submatrix M */
3304     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3305 
3306     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3307     asub = (Mat_MPIAIJ *)M->data;
3308 
3309     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3310     n = asub->B->cmap->N;
3311     if (BsubN > n) {
3312       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3313       const PetscInt *idx;
3314       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3315       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3316 
3317       PetscCall(PetscMalloc1(n, &idx_new));
3318       j = 0;
3319       PetscCall(ISGetIndices(iscol_o, &idx));
3320       for (i = 0; i < n; i++) {
3321         if (j >= BsubN) break;
3322         while (subgarray[i] > garray[j]) j++;
3323 
3324         if (subgarray[i] == garray[j]) {
3325           idx_new[i] = idx[j++];
3326         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3327       }
3328       PetscCall(ISRestoreIndices(iscol_o, &idx));
3329 
3330       PetscCall(ISDestroy(&iscol_o));
3331       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3332 
3333     } else if (BsubN < n) {
3334       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3335     }
3336 
3337     PetscCall(PetscFree(garray));
3338     *submat = M;
3339 
3340     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3341     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3342     PetscCall(ISDestroy(&isrow_d));
3343 
3344     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3345     PetscCall(ISDestroy(&iscol_d));
3346 
3347     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3348     PetscCall(ISDestroy(&iscol_o));
3349   }
3350   PetscFunctionReturn(PETSC_SUCCESS);
3351 }
3352 
3353 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3354 {
3355   IS        iscol_local = NULL, isrow_d;
3356   PetscInt  csize;
3357   PetscInt  n, i, j, start, end;
3358   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3359   MPI_Comm  comm;
3360 
3361   PetscFunctionBegin;
3362   /* If isrow has same processor distribution as mat,
3363      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3364   if (call == MAT_REUSE_MATRIX) {
3365     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3366     if (isrow_d) {
3367       sameRowDist  = PETSC_TRUE;
3368       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3369     } else {
3370       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3371       if (iscol_local) {
3372         sameRowDist  = PETSC_TRUE;
3373         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3374       }
3375     }
3376   } else {
3377     /* Check if isrow has same processor distribution as mat */
3378     sameDist[0] = PETSC_FALSE;
3379     PetscCall(ISGetLocalSize(isrow, &n));
3380     if (!n) {
3381       sameDist[0] = PETSC_TRUE;
3382     } else {
3383       PetscCall(ISGetMinMax(isrow, &i, &j));
3384       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3385       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3386     }
3387 
3388     /* Check if iscol has same processor distribution as mat */
3389     sameDist[1] = PETSC_FALSE;
3390     PetscCall(ISGetLocalSize(iscol, &n));
3391     if (!n) {
3392       sameDist[1] = PETSC_TRUE;
3393     } else {
3394       PetscCall(ISGetMinMax(iscol, &i, &j));
3395       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3396       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3397     }
3398 
3399     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3400     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3401     sameRowDist = tsameDist[0];
3402   }
3403 
3404   if (sameRowDist) {
3405     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3406       /* isrow and iscol have same processor distribution as mat */
3407       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3408       PetscFunctionReturn(PETSC_SUCCESS);
3409     } else { /* sameRowDist */
3410       /* isrow has same processor distribution as mat */
3411       if (call == MAT_INITIAL_MATRIX) {
3412         PetscBool sorted;
3413         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3414         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3415         PetscCall(ISGetSize(iscol, &i));
3416         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3417 
3418         PetscCall(ISSorted(iscol_local, &sorted));
3419         if (sorted) {
3420           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3421           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3422           PetscFunctionReturn(PETSC_SUCCESS);
3423         }
3424       } else { /* call == MAT_REUSE_MATRIX */
3425         IS iscol_sub;
3426         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3427         if (iscol_sub) {
3428           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3429           PetscFunctionReturn(PETSC_SUCCESS);
3430         }
3431       }
3432     }
3433   }
3434 
3435   /* General case: iscol -> iscol_local which has global size of iscol */
3436   if (call == MAT_REUSE_MATRIX) {
3437     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3438     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3439   } else {
3440     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3441   }
3442 
3443   PetscCall(ISGetLocalSize(iscol, &csize));
3444   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3445 
3446   if (call == MAT_INITIAL_MATRIX) {
3447     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3448     PetscCall(ISDestroy(&iscol_local));
3449   }
3450   PetscFunctionReturn(PETSC_SUCCESS);
3451 }
3452 
3453 /*@C
3454   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3455   and "off-diagonal" part of the matrix in CSR format.
3456 
3457   Collective
3458 
3459   Input Parameters:
3460 + comm   - MPI communicator
3461 . A      - "diagonal" portion of matrix
3462 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3463 - garray - global index of `B` columns
3464 
3465   Output Parameter:
3466 . mat - the matrix, with input `A` as its local diagonal matrix
3467 
3468   Level: advanced
3469 
3470   Notes:
3471   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3472 
3473   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3474 
3475 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3476 @*/
3477 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3478 {
3479   Mat_MPIAIJ        *maij;
3480   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3481   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3482   const PetscScalar *oa;
3483   Mat                Bnew;
3484   PetscInt           m, n, N;
3485   MatType            mpi_mat_type;
3486 
3487   PetscFunctionBegin;
3488   PetscCall(MatCreate(comm, mat));
3489   PetscCall(MatGetSize(A, &m, &n));
3490   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3491   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3492   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3493   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3494 
3495   /* Get global columns of mat */
3496   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3497 
3498   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3499   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3500   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3501   PetscCall(MatSetType(*mat, mpi_mat_type));
3502 
3503   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3504   maij = (Mat_MPIAIJ *)(*mat)->data;
3505 
3506   (*mat)->preallocated = PETSC_TRUE;
3507 
3508   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3509   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3510 
3511   /* Set A as diagonal portion of *mat */
3512   maij->A = A;
3513 
3514   nz = oi[m];
3515   for (i = 0; i < nz; i++) {
3516     col   = oj[i];
3517     oj[i] = garray[col];
3518   }
3519 
3520   /* Set Bnew as off-diagonal portion of *mat */
3521   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3522   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3523   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3524   bnew        = (Mat_SeqAIJ *)Bnew->data;
3525   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3526   maij->B     = Bnew;
3527 
3528   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3529 
3530   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3531   b->free_a       = PETSC_FALSE;
3532   b->free_ij      = PETSC_FALSE;
3533   PetscCall(MatDestroy(&B));
3534 
3535   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3536   bnew->free_a       = PETSC_TRUE;
3537   bnew->free_ij      = PETSC_TRUE;
3538 
3539   /* condense columns of maij->B */
3540   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3541   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3542   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3543   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3544   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3545   PetscFunctionReturn(PETSC_SUCCESS);
3546 }
3547 
3548 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3549 
3550 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3551 {
3552   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3553   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3554   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3555   Mat             M, Msub, B = a->B;
3556   MatScalar      *aa;
3557   Mat_SeqAIJ     *aij;
3558   PetscInt       *garray = a->garray, *colsub, Ncols;
3559   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3560   IS              iscol_sub, iscmap;
3561   const PetscInt *is_idx, *cmap;
3562   PetscBool       allcolumns = PETSC_FALSE;
3563   MPI_Comm        comm;
3564 
3565   PetscFunctionBegin;
3566   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3567   if (call == MAT_REUSE_MATRIX) {
3568     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3569     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3570     PetscCall(ISGetLocalSize(iscol_sub, &count));
3571 
3572     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3573     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3574 
3575     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3576     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3577 
3578     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3579 
3580   } else { /* call == MAT_INITIAL_MATRIX) */
3581     PetscBool flg;
3582 
3583     PetscCall(ISGetLocalSize(iscol, &n));
3584     PetscCall(ISGetSize(iscol, &Ncols));
3585 
3586     /* (1) iscol -> nonscalable iscol_local */
3587     /* Check for special case: each processor gets entire matrix columns */
3588     PetscCall(ISIdentity(iscol_local, &flg));
3589     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3590     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3591     if (allcolumns) {
3592       iscol_sub = iscol_local;
3593       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3594       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3595 
3596     } else {
3597       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3598       PetscInt *idx, *cmap1, k;
3599       PetscCall(PetscMalloc1(Ncols, &idx));
3600       PetscCall(PetscMalloc1(Ncols, &cmap1));
3601       PetscCall(ISGetIndices(iscol_local, &is_idx));
3602       count = 0;
3603       k     = 0;
3604       for (i = 0; i < Ncols; i++) {
3605         j = is_idx[i];
3606         if (j >= cstart && j < cend) {
3607           /* diagonal part of mat */
3608           idx[count]     = j;
3609           cmap1[count++] = i; /* column index in submat */
3610         } else if (Bn) {
3611           /* off-diagonal part of mat */
3612           if (j == garray[k]) {
3613             idx[count]     = j;
3614             cmap1[count++] = i; /* column index in submat */
3615           } else if (j > garray[k]) {
3616             while (j > garray[k] && k < Bn - 1) k++;
3617             if (j == garray[k]) {
3618               idx[count]     = j;
3619               cmap1[count++] = i; /* column index in submat */
3620             }
3621           }
3622         }
3623       }
3624       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3625 
3626       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3627       PetscCall(ISGetBlockSize(iscol, &cbs));
3628       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3629 
3630       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3631     }
3632 
3633     /* (3) Create sequential Msub */
3634     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3635   }
3636 
3637   PetscCall(ISGetLocalSize(iscol_sub, &count));
3638   aij = (Mat_SeqAIJ *)(Msub)->data;
3639   ii  = aij->i;
3640   PetscCall(ISGetIndices(iscmap, &cmap));
3641 
3642   /*
3643       m - number of local rows
3644       Ncols - number of columns (same on all processors)
3645       rstart - first row in new global matrix generated
3646   */
3647   PetscCall(MatGetSize(Msub, &m, NULL));
3648 
3649   if (call == MAT_INITIAL_MATRIX) {
3650     /* (4) Create parallel newmat */
3651     PetscMPIInt rank, size;
3652     PetscInt    csize;
3653 
3654     PetscCallMPI(MPI_Comm_size(comm, &size));
3655     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3656 
3657     /*
3658         Determine the number of non-zeros in the diagonal and off-diagonal
3659         portions of the matrix in order to do correct preallocation
3660     */
3661 
3662     /* first get start and end of "diagonal" columns */
3663     PetscCall(ISGetLocalSize(iscol, &csize));
3664     if (csize == PETSC_DECIDE) {
3665       PetscCall(ISGetSize(isrow, &mglobal));
3666       if (mglobal == Ncols) { /* square matrix */
3667         nlocal = m;
3668       } else {
3669         nlocal = Ncols / size + ((Ncols % size) > rank);
3670       }
3671     } else {
3672       nlocal = csize;
3673     }
3674     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3675     rstart = rend - nlocal;
3676     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3677 
3678     /* next, compute all the lengths */
3679     jj = aij->j;
3680     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3681     olens = dlens + m;
3682     for (i = 0; i < m; i++) {
3683       jend = ii[i + 1] - ii[i];
3684       olen = 0;
3685       dlen = 0;
3686       for (j = 0; j < jend; j++) {
3687         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3688         else dlen++;
3689         jj++;
3690       }
3691       olens[i] = olen;
3692       dlens[i] = dlen;
3693     }
3694 
3695     PetscCall(ISGetBlockSize(isrow, &bs));
3696     PetscCall(ISGetBlockSize(iscol, &cbs));
3697 
3698     PetscCall(MatCreate(comm, &M));
3699     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3700     PetscCall(MatSetBlockSizes(M, bs, cbs));
3701     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3702     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3703     PetscCall(PetscFree(dlens));
3704 
3705   } else { /* call == MAT_REUSE_MATRIX */
3706     M = *newmat;
3707     PetscCall(MatGetLocalSize(M, &i, NULL));
3708     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3709     PetscCall(MatZeroEntries(M));
3710     /*
3711          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3712        rather than the slower MatSetValues().
3713     */
3714     M->was_assembled = PETSC_TRUE;
3715     M->assembled     = PETSC_FALSE;
3716   }
3717 
3718   /* (5) Set values of Msub to *newmat */
3719   PetscCall(PetscMalloc1(count, &colsub));
3720   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3721 
3722   jj = aij->j;
3723   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3724   for (i = 0; i < m; i++) {
3725     row = rstart + i;
3726     nz  = ii[i + 1] - ii[i];
3727     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3728     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3729     jj += nz;
3730     aa += nz;
3731   }
3732   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3733   PetscCall(ISRestoreIndices(iscmap, &cmap));
3734 
3735   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3736   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3737 
3738   PetscCall(PetscFree(colsub));
3739 
3740   /* save Msub, iscol_sub and iscmap used in processor for next request */
3741   if (call == MAT_INITIAL_MATRIX) {
3742     *newmat = M;
3743     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3744     PetscCall(MatDestroy(&Msub));
3745 
3746     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3747     PetscCall(ISDestroy(&iscol_sub));
3748 
3749     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3750     PetscCall(ISDestroy(&iscmap));
3751 
3752     if (iscol_local) {
3753       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3754       PetscCall(ISDestroy(&iscol_local));
3755     }
3756   }
3757   PetscFunctionReturn(PETSC_SUCCESS);
3758 }
3759 
3760 /*
3761     Not great since it makes two copies of the submatrix, first an SeqAIJ
3762   in local and then by concatenating the local matrices the end result.
3763   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3764 
3765   This requires a sequential iscol with all indices.
3766 */
3767 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3768 {
3769   PetscMPIInt rank, size;
3770   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3771   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3772   Mat         M, Mreuse;
3773   MatScalar  *aa, *vwork;
3774   MPI_Comm    comm;
3775   Mat_SeqAIJ *aij;
3776   PetscBool   colflag, allcolumns = PETSC_FALSE;
3777 
3778   PetscFunctionBegin;
3779   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3780   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3781   PetscCallMPI(MPI_Comm_size(comm, &size));
3782 
3783   /* Check for special case: each processor gets entire matrix columns */
3784   PetscCall(ISIdentity(iscol, &colflag));
3785   PetscCall(ISGetLocalSize(iscol, &n));
3786   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3787   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3788 
3789   if (call == MAT_REUSE_MATRIX) {
3790     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3791     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3792     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3793   } else {
3794     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3795   }
3796 
3797   /*
3798       m - number of local rows
3799       n - number of columns (same on all processors)
3800       rstart - first row in new global matrix generated
3801   */
3802   PetscCall(MatGetSize(Mreuse, &m, &n));
3803   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3804   if (call == MAT_INITIAL_MATRIX) {
3805     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3806     ii  = aij->i;
3807     jj  = aij->j;
3808 
3809     /*
3810         Determine the number of non-zeros in the diagonal and off-diagonal
3811         portions of the matrix in order to do correct preallocation
3812     */
3813 
3814     /* first get start and end of "diagonal" columns */
3815     if (csize == PETSC_DECIDE) {
3816       PetscCall(ISGetSize(isrow, &mglobal));
3817       if (mglobal == n) { /* square matrix */
3818         nlocal = m;
3819       } else {
3820         nlocal = n / size + ((n % size) > rank);
3821       }
3822     } else {
3823       nlocal = csize;
3824     }
3825     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3826     rstart = rend - nlocal;
3827     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3828 
3829     /* next, compute all the lengths */
3830     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3831     olens = dlens + m;
3832     for (i = 0; i < m; i++) {
3833       jend = ii[i + 1] - ii[i];
3834       olen = 0;
3835       dlen = 0;
3836       for (j = 0; j < jend; j++) {
3837         if (*jj < rstart || *jj >= rend) olen++;
3838         else dlen++;
3839         jj++;
3840       }
3841       olens[i] = olen;
3842       dlens[i] = dlen;
3843     }
3844     PetscCall(MatCreate(comm, &M));
3845     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3846     PetscCall(MatSetBlockSizes(M, bs, cbs));
3847     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3848     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3849     PetscCall(PetscFree(dlens));
3850   } else {
3851     PetscInt ml, nl;
3852 
3853     M = *newmat;
3854     PetscCall(MatGetLocalSize(M, &ml, &nl));
3855     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3856     PetscCall(MatZeroEntries(M));
3857     /*
3858          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3859        rather than the slower MatSetValues().
3860     */
3861     M->was_assembled = PETSC_TRUE;
3862     M->assembled     = PETSC_FALSE;
3863   }
3864   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3865   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3866   ii  = aij->i;
3867   jj  = aij->j;
3868 
3869   /* trigger copy to CPU if needed */
3870   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3871   for (i = 0; i < m; i++) {
3872     row   = rstart + i;
3873     nz    = ii[i + 1] - ii[i];
3874     cwork = jj;
3875     jj    = PetscSafePointerPlusOffset(jj, nz);
3876     vwork = aa;
3877     aa    = PetscSafePointerPlusOffset(aa, nz);
3878     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3879   }
3880   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3881 
3882   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3883   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3884   *newmat = M;
3885 
3886   /* save submatrix used in processor for next request */
3887   if (call == MAT_INITIAL_MATRIX) {
3888     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3889     PetscCall(MatDestroy(&Mreuse));
3890   }
3891   PetscFunctionReturn(PETSC_SUCCESS);
3892 }
3893 
3894 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3895 {
3896   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3897   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3898   const PetscInt *JJ;
3899   PetscBool       nooffprocentries;
3900   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3901 
3902   PetscFunctionBegin;
3903   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3904 
3905   PetscCall(PetscLayoutSetUp(B->rmap));
3906   PetscCall(PetscLayoutSetUp(B->cmap));
3907   m      = B->rmap->n;
3908   cstart = B->cmap->rstart;
3909   cend   = B->cmap->rend;
3910   rstart = B->rmap->rstart;
3911 
3912   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3913 
3914   if (PetscDefined(USE_DEBUG)) {
3915     for (i = 0; i < m; i++) {
3916       nnz = Ii[i + 1] - Ii[i];
3917       JJ  = PetscSafePointerPlusOffset(J, Ii[i]);
3918       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3919       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3920       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3921     }
3922   }
3923 
3924   for (i = 0; i < m; i++) {
3925     nnz     = Ii[i + 1] - Ii[i];
3926     JJ      = PetscSafePointerPlusOffset(J, Ii[i]);
3927     nnz_max = PetscMax(nnz_max, nnz);
3928     d       = 0;
3929     for (j = 0; j < nnz; j++) {
3930       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3931     }
3932     d_nnz[i] = d;
3933     o_nnz[i] = nnz - d;
3934   }
3935   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3936   PetscCall(PetscFree2(d_nnz, o_nnz));
3937 
3938   for (i = 0; i < m; i++) {
3939     ii = i + rstart;
3940     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES));
3941   }
3942   nooffprocentries    = B->nooffprocentries;
3943   B->nooffprocentries = PETSC_TRUE;
3944   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3945   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3946   B->nooffprocentries = nooffprocentries;
3947 
3948   /* count number of entries below block diagonal */
3949   PetscCall(PetscFree(Aij->ld));
3950   PetscCall(PetscCalloc1(m, &ld));
3951   Aij->ld = ld;
3952   for (i = 0; i < m; i++) {
3953     nnz = Ii[i + 1] - Ii[i];
3954     j   = 0;
3955     while (j < nnz && J[j] < cstart) j++;
3956     ld[i] = j;
3957     if (J) J += nnz;
3958   }
3959 
3960   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3961   PetscFunctionReturn(PETSC_SUCCESS);
3962 }
3963 
3964 /*@
3965   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3966   (the default parallel PETSc format).
3967 
3968   Collective
3969 
3970   Input Parameters:
3971 + B - the matrix
3972 . i - the indices into `j` for the start of each local row (indices start with zero)
3973 . j - the column indices for each local row (indices start with zero)
3974 - v - optional values in the matrix
3975 
3976   Level: developer
3977 
3978   Notes:
3979   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3980   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3981   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3982 
3983   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3984 
3985   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3986 
3987   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3988 
3989   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3990   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3991 
3992   The format which is used for the sparse matrix input, is equivalent to a
3993   row-major ordering.. i.e for the following matrix, the input data expected is
3994   as shown
3995 .vb
3996         1 0 0
3997         2 0 3     P0
3998        -------
3999         4 5 6     P1
4000 
4001      Process0 [P0] rows_owned=[0,1]
4002         i =  {0,1,3}  [size = nrow+1  = 2+1]
4003         j =  {0,0,2}  [size = 3]
4004         v =  {1,2,3}  [size = 3]
4005 
4006      Process1 [P1] rows_owned=[2]
4007         i =  {0,3}    [size = nrow+1  = 1+1]
4008         j =  {0,1,2}  [size = 3]
4009         v =  {4,5,6}  [size = 3]
4010 .ve
4011 
4012 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4013           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4014 @*/
4015 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4016 {
4017   PetscFunctionBegin;
4018   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4019   PetscFunctionReturn(PETSC_SUCCESS);
4020 }
4021 
4022 /*@C
4023   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4024   (the default parallel PETSc format).  For good matrix assembly performance
4025   the user should preallocate the matrix storage by setting the parameters
4026   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4027 
4028   Collective
4029 
4030   Input Parameters:
4031 + B     - the matrix
4032 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4033            (same value is used for all local rows)
4034 . d_nnz - array containing the number of nonzeros in the various rows of the
4035            DIAGONAL portion of the local submatrix (possibly different for each row)
4036            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4037            The size of this array is equal to the number of local rows, i.e 'm'.
4038            For matrices that will be factored, you must leave room for (and set)
4039            the diagonal entry even if it is zero.
4040 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4041            submatrix (same value is used for all local rows).
4042 - o_nnz - array containing the number of nonzeros in the various rows of the
4043            OFF-DIAGONAL portion of the local submatrix (possibly different for
4044            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4045            structure. The size of this array is equal to the number
4046            of local rows, i.e 'm'.
4047 
4048   Example Usage:
4049   Consider the following 8x8 matrix with 34 non-zero values, that is
4050   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4051   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4052   as follows
4053 
4054 .vb
4055             1  2  0  |  0  3  0  |  0  4
4056     Proc0   0  5  6  |  7  0  0  |  8  0
4057             9  0 10  | 11  0  0  | 12  0
4058     -------------------------------------
4059            13  0 14  | 15 16 17  |  0  0
4060     Proc1   0 18  0  | 19 20 21  |  0  0
4061             0  0  0  | 22 23  0  | 24  0
4062     -------------------------------------
4063     Proc2  25 26 27  |  0  0 28  | 29  0
4064            30  0  0  | 31 32 33  |  0 34
4065 .ve
4066 
4067   This can be represented as a collection of submatrices as
4068 .vb
4069       A B C
4070       D E F
4071       G H I
4072 .ve
4073 
4074   Where the submatrices A,B,C are owned by proc0, D,E,F are
4075   owned by proc1, G,H,I are owned by proc2.
4076 
4077   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4078   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4079   The 'M','N' parameters are 8,8, and have the same values on all procs.
4080 
4081   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4082   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4083   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4084   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4085   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4086   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4087 
4088   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4089   allocated for every row of the local diagonal submatrix, and `o_nz`
4090   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4091   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4092   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4093   In this case, the values of `d_nz`, `o_nz` are
4094 .vb
4095      proc0  dnz = 2, o_nz = 2
4096      proc1  dnz = 3, o_nz = 2
4097      proc2  dnz = 1, o_nz = 4
4098 .ve
4099   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4100   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4101   for proc3. i.e we are using 12+15+10=37 storage locations to store
4102   34 values.
4103 
4104   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4105   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4106   In the above case the values for `d_nnz`, `o_nnz` are
4107 .vb
4108      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4109      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4110      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4111 .ve
4112   Here the space allocated is sum of all the above values i.e 34, and
4113   hence pre-allocation is perfect.
4114 
4115   Level: intermediate
4116 
4117   Notes:
4118   If the *_nnz parameter is given then the *_nz parameter is ignored
4119 
4120   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4121   storage.  The stored row and column indices begin with zero.
4122   See [Sparse Matrices](sec_matsparse) for details.
4123 
4124   The parallel matrix is partitioned such that the first m0 rows belong to
4125   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4126   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4127 
4128   The DIAGONAL portion of the local submatrix of a processor can be defined
4129   as the submatrix which is obtained by extraction the part corresponding to
4130   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4131   first row that belongs to the processor, r2 is the last row belonging to
4132   the this processor, and c1-c2 is range of indices of the local part of a
4133   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4134   common case of a square matrix, the row and column ranges are the same and
4135   the DIAGONAL part is also square. The remaining portion of the local
4136   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4137 
4138   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4139 
4140   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4141   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4142   You can also run with the option `-info` and look for messages with the string
4143   malloc in them to see if additional memory allocation was needed.
4144 
4145 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4146           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4147 @*/
4148 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4149 {
4150   PetscFunctionBegin;
4151   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4152   PetscValidType(B, 1);
4153   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4154   PetscFunctionReturn(PETSC_SUCCESS);
4155 }
4156 
4157 /*@
4158   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4159   CSR format for the local rows.
4160 
4161   Collective
4162 
4163   Input Parameters:
4164 + comm - MPI communicator
4165 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4166 . n    - This value should be the same as the local size used in creating the
4167          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4168          calculated if `N` is given) For square matrices n is almost always `m`.
4169 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4170 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4171 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4172 . j    - global column indices
4173 - a    - optional matrix values
4174 
4175   Output Parameter:
4176 . mat - the matrix
4177 
4178   Level: intermediate
4179 
4180   Notes:
4181   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4182   thus you CANNOT change the matrix entries by changing the values of a[] after you have
4183   called this routine. Use `MatCreateMPIAIJWithSplitArray()` to avoid needing to copy the arrays.
4184 
4185   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4186 
4187   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4188 
4189   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4190   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4191 
4192   The format which is used for the sparse matrix input, is equivalent to a
4193   row-major ordering.. i.e for the following matrix, the input data expected is
4194   as shown
4195 .vb
4196         1 0 0
4197         2 0 3     P0
4198        -------
4199         4 5 6     P1
4200 
4201      Process0 [P0] rows_owned=[0,1]
4202         i =  {0,1,3}  [size = nrow+1  = 2+1]
4203         j =  {0,0,2}  [size = 3]
4204         v =  {1,2,3}  [size = 3]
4205 
4206      Process1 [P1] rows_owned=[2]
4207         i =  {0,3}    [size = nrow+1  = 1+1]
4208         j =  {0,1,2}  [size = 3]
4209         v =  {4,5,6}  [size = 3]
4210 .ve
4211 
4212 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4213           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4214 @*/
4215 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4216 {
4217   PetscFunctionBegin;
4218   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4219   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4220   PetscCall(MatCreate(comm, mat));
4221   PetscCall(MatSetSizes(*mat, m, n, M, N));
4222   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4223   PetscCall(MatSetType(*mat, MATMPIAIJ));
4224   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4225   PetscFunctionReturn(PETSC_SUCCESS);
4226 }
4227 
4228 /*@
4229   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4230   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4231   from `MatCreateMPIAIJWithArrays()`
4232 
4233   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4234 
4235   Collective
4236 
4237   Input Parameters:
4238 + mat - the matrix
4239 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4240 . n   - This value should be the same as the local size used in creating the
4241        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4242        calculated if N is given) For square matrices n is almost always m.
4243 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4244 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4245 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4246 . J   - column indices
4247 - v   - matrix values
4248 
4249   Level: deprecated
4250 
4251 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4252           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4253 @*/
4254 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4255 {
4256   PetscInt        nnz, i;
4257   PetscBool       nooffprocentries;
4258   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4259   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4260   PetscScalar    *ad, *ao;
4261   PetscInt        ldi, Iii, md;
4262   const PetscInt *Adi = Ad->i;
4263   PetscInt       *ld  = Aij->ld;
4264 
4265   PetscFunctionBegin;
4266   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4267   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4268   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4269   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4270 
4271   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4272   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4273 
4274   for (i = 0; i < m; i++) {
4275     if (PetscDefined(USE_DEBUG)) {
4276       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4277         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4278         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4279       }
4280     }
4281     nnz = Ii[i + 1] - Ii[i];
4282     Iii = Ii[i];
4283     ldi = ld[i];
4284     md  = Adi[i + 1] - Adi[i];
4285     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4286     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4287     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4288     ad += md;
4289     ao += nnz - md;
4290   }
4291   nooffprocentries      = mat->nooffprocentries;
4292   mat->nooffprocentries = PETSC_TRUE;
4293   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4294   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4295   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4296   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4297   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4298   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4299   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4300   mat->nooffprocentries = nooffprocentries;
4301   PetscFunctionReturn(PETSC_SUCCESS);
4302 }
4303 
4304 /*@
4305   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4306 
4307   Collective
4308 
4309   Input Parameters:
4310 + mat - the matrix
4311 - v   - matrix values, stored by row
4312 
4313   Level: intermediate
4314 
4315   Notes:
4316   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4317 
4318   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4319 
4320 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4321           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4322 @*/
4323 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4324 {
4325   PetscInt        nnz, i, m;
4326   PetscBool       nooffprocentries;
4327   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4328   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4329   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4330   PetscScalar    *ad, *ao;
4331   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4332   PetscInt        ldi, Iii, md;
4333   PetscInt       *ld = Aij->ld;
4334 
4335   PetscFunctionBegin;
4336   m = mat->rmap->n;
4337 
4338   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4339   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4340   Iii = 0;
4341   for (i = 0; i < m; i++) {
4342     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4343     ldi = ld[i];
4344     md  = Adi[i + 1] - Adi[i];
4345     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4346     ad += md;
4347     if (ao) {
4348       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4349       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4350       ao += nnz - md;
4351     }
4352     Iii += nnz;
4353   }
4354   nooffprocentries      = mat->nooffprocentries;
4355   mat->nooffprocentries = PETSC_TRUE;
4356   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4357   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4358   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4359   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4360   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4361   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4362   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4363   mat->nooffprocentries = nooffprocentries;
4364   PetscFunctionReturn(PETSC_SUCCESS);
4365 }
4366 
4367 /*@C
4368   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4369   (the default parallel PETSc format).  For good matrix assembly performance
4370   the user should preallocate the matrix storage by setting the parameters
4371   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4372 
4373   Collective
4374 
4375   Input Parameters:
4376 + comm  - MPI communicator
4377 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4378            This value should be the same as the local size used in creating the
4379            y vector for the matrix-vector product y = Ax.
4380 . n     - This value should be the same as the local size used in creating the
4381        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4382        calculated if N is given) For square matrices n is almost always m.
4383 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4384 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4385 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4386            (same value is used for all local rows)
4387 . d_nnz - array containing the number of nonzeros in the various rows of the
4388            DIAGONAL portion of the local submatrix (possibly different for each row)
4389            or `NULL`, if `d_nz` is used to specify the nonzero structure.
4390            The size of this array is equal to the number of local rows, i.e 'm'.
4391 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4392            submatrix (same value is used for all local rows).
4393 - o_nnz - array containing the number of nonzeros in the various rows of the
4394            OFF-DIAGONAL portion of the local submatrix (possibly different for
4395            each row) or `NULL`, if `o_nz` is used to specify the nonzero
4396            structure. The size of this array is equal to the number
4397            of local rows, i.e 'm'.
4398 
4399   Output Parameter:
4400 . A - the matrix
4401 
4402   Options Database Keys:
4403 + -mat_no_inode                     - Do not use inodes
4404 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4405 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4406         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4407         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4408 
4409   Level: intermediate
4410 
4411   Notes:
4412   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4413   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4414   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4415 
4416   If the *_nnz parameter is given then the *_nz parameter is ignored
4417 
4418   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4419   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4420   storage requirements for this matrix.
4421 
4422   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4423   processor than it must be used on all processors that share the object for
4424   that argument.
4425 
4426   The user MUST specify either the local or global matrix dimensions
4427   (possibly both).
4428 
4429   The parallel matrix is partitioned across processors such that the
4430   first m0 rows belong to process 0, the next m1 rows belong to
4431   process 1, the next m2 rows belong to process 2 etc.. where
4432   m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4433   values corresponding to [m x N] submatrix.
4434 
4435   The columns are logically partitioned with the n0 columns belonging
4436   to 0th partition, the next n1 columns belonging to the next
4437   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4438 
4439   The DIAGONAL portion of the local submatrix on any given processor
4440   is the submatrix corresponding to the rows and columns m,n
4441   corresponding to the given processor. i.e diagonal matrix on
4442   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4443   etc. The remaining portion of the local submatrix [m x (N-n)]
4444   constitute the OFF-DIAGONAL portion. The example below better
4445   illustrates this concept.
4446 
4447   For a square global matrix we define each processor's diagonal portion
4448   to be its local rows and the corresponding columns (a square submatrix);
4449   each processor's off-diagonal portion encompasses the remainder of the
4450   local matrix (a rectangular submatrix).
4451 
4452   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4453 
4454   When calling this routine with a single process communicator, a matrix of
4455   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4456   type of communicator, use the construction mechanism
4457 .vb
4458   MatCreate(..., &A);
4459   MatSetType(A, MATMPIAIJ);
4460   MatSetSizes(A, m, n, M, N);
4461   MatMPIAIJSetPreallocation(A, ...);
4462 .ve
4463 
4464   By default, this format uses inodes (identical nodes) when possible.
4465   We search for consecutive rows with the same nonzero structure, thereby
4466   reusing matrix information to achieve increased efficiency.
4467 
4468   Example Usage:
4469   Consider the following 8x8 matrix with 34 non-zero values, that is
4470   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4471   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4472   as follows
4473 
4474 .vb
4475             1  2  0  |  0  3  0  |  0  4
4476     Proc0   0  5  6  |  7  0  0  |  8  0
4477             9  0 10  | 11  0  0  | 12  0
4478     -------------------------------------
4479            13  0 14  | 15 16 17  |  0  0
4480     Proc1   0 18  0  | 19 20 21  |  0  0
4481             0  0  0  | 22 23  0  | 24  0
4482     -------------------------------------
4483     Proc2  25 26 27  |  0  0 28  | 29  0
4484            30  0  0  | 31 32 33  |  0 34
4485 .ve
4486 
4487   This can be represented as a collection of submatrices as
4488 
4489 .vb
4490       A B C
4491       D E F
4492       G H I
4493 .ve
4494 
4495   Where the submatrices A,B,C are owned by proc0, D,E,F are
4496   owned by proc1, G,H,I are owned by proc2.
4497 
4498   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4499   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4500   The 'M','N' parameters are 8,8, and have the same values on all procs.
4501 
4502   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4503   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4504   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4505   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4506   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4507   matrix, ans [DF] as another SeqAIJ matrix.
4508 
4509   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4510   allocated for every row of the local diagonal submatrix, and `o_nz`
4511   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4512   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4513   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4514   In this case, the values of `d_nz`,`o_nz` are
4515 .vb
4516      proc0  dnz = 2, o_nz = 2
4517      proc1  dnz = 3, o_nz = 2
4518      proc2  dnz = 1, o_nz = 4
4519 .ve
4520   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4521   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4522   for proc3. i.e we are using 12+15+10=37 storage locations to store
4523   34 values.
4524 
4525   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4526   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4527   In the above case the values for d_nnz,o_nnz are
4528 .vb
4529      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4530      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4531      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4532 .ve
4533   Here the space allocated is sum of all the above values i.e 34, and
4534   hence pre-allocation is perfect.
4535 
4536 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4537           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4538 @*/
4539 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4540 {
4541   PetscMPIInt size;
4542 
4543   PetscFunctionBegin;
4544   PetscCall(MatCreate(comm, A));
4545   PetscCall(MatSetSizes(*A, m, n, M, N));
4546   PetscCallMPI(MPI_Comm_size(comm, &size));
4547   if (size > 1) {
4548     PetscCall(MatSetType(*A, MATMPIAIJ));
4549     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4550   } else {
4551     PetscCall(MatSetType(*A, MATSEQAIJ));
4552     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4553   }
4554   PetscFunctionReturn(PETSC_SUCCESS);
4555 }
4556 
4557 /*MC
4558     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4559 
4560     Synopsis:
4561     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4562 
4563     Not Collective
4564 
4565     Input Parameter:
4566 .   A - the `MATMPIAIJ` matrix
4567 
4568     Output Parameters:
4569 +   Ad - the diagonal portion of the matrix
4570 .   Ao - the off-diagonal portion of the matrix
4571 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4572 -   ierr - error code
4573 
4574      Level: advanced
4575 
4576     Note:
4577     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4578 
4579 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4580 M*/
4581 
4582 /*MC
4583     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4584 
4585     Synopsis:
4586     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4587 
4588     Not Collective
4589 
4590     Input Parameters:
4591 +   A - the `MATMPIAIJ` matrix
4592 .   Ad - the diagonal portion of the matrix
4593 .   Ao - the off-diagonal portion of the matrix
4594 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4595 -   ierr - error code
4596 
4597      Level: advanced
4598 
4599 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4600 M*/
4601 
4602 /*@C
4603   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4604 
4605   Not Collective
4606 
4607   Input Parameter:
4608 . A - The `MATMPIAIJ` matrix
4609 
4610   Output Parameters:
4611 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4612 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4613 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4614 
4615   Level: intermediate
4616 
4617   Note:
4618   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4619   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4620   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4621   local column numbers to global column numbers in the original matrix.
4622 
4623   Fortran Notes:
4624   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4625 
4626 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4627 @*/
4628 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4629 {
4630   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4631   PetscBool   flg;
4632 
4633   PetscFunctionBegin;
4634   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4635   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4636   if (Ad) *Ad = a->A;
4637   if (Ao) *Ao = a->B;
4638   if (colmap) *colmap = a->garray;
4639   PetscFunctionReturn(PETSC_SUCCESS);
4640 }
4641 
4642 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4643 {
4644   PetscInt     m, N, i, rstart, nnz, Ii;
4645   PetscInt    *indx;
4646   PetscScalar *values;
4647   MatType      rootType;
4648 
4649   PetscFunctionBegin;
4650   PetscCall(MatGetSize(inmat, &m, &N));
4651   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4652     PetscInt *dnz, *onz, sum, bs, cbs;
4653 
4654     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4655     /* Check sum(n) = N */
4656     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4657     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4658 
4659     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4660     rstart -= m;
4661 
4662     MatPreallocateBegin(comm, m, n, dnz, onz);
4663     for (i = 0; i < m; i++) {
4664       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4665       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4666       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4667     }
4668 
4669     PetscCall(MatCreate(comm, outmat));
4670     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4671     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4672     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4673     PetscCall(MatGetRootType_Private(inmat, &rootType));
4674     PetscCall(MatSetType(*outmat, rootType));
4675     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4676     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4677     MatPreallocateEnd(dnz, onz);
4678     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4679   }
4680 
4681   /* numeric phase */
4682   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4683   for (i = 0; i < m; i++) {
4684     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4685     Ii = i + rstart;
4686     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4687     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4688   }
4689   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4690   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4691   PetscFunctionReturn(PETSC_SUCCESS);
4692 }
4693 
4694 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4695 {
4696   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4697 
4698   PetscFunctionBegin;
4699   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4700   PetscCall(PetscFree(merge->id_r));
4701   PetscCall(PetscFree(merge->len_s));
4702   PetscCall(PetscFree(merge->len_r));
4703   PetscCall(PetscFree(merge->bi));
4704   PetscCall(PetscFree(merge->bj));
4705   PetscCall(PetscFree(merge->buf_ri[0]));
4706   PetscCall(PetscFree(merge->buf_ri));
4707   PetscCall(PetscFree(merge->buf_rj[0]));
4708   PetscCall(PetscFree(merge->buf_rj));
4709   PetscCall(PetscFree(merge->coi));
4710   PetscCall(PetscFree(merge->coj));
4711   PetscCall(PetscFree(merge->owners_co));
4712   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4713   PetscCall(PetscFree(merge));
4714   PetscFunctionReturn(PETSC_SUCCESS);
4715 }
4716 
4717 #include <../src/mat/utils/freespace.h>
4718 #include <petscbt.h>
4719 
4720 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4721 {
4722   MPI_Comm             comm;
4723   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4724   PetscMPIInt          size, rank, taga, *len_s;
4725   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4726   PetscInt             proc, m;
4727   PetscInt           **buf_ri, **buf_rj;
4728   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4729   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4730   MPI_Request         *s_waits, *r_waits;
4731   MPI_Status          *status;
4732   const MatScalar     *aa, *a_a;
4733   MatScalar          **abuf_r, *ba_i;
4734   Mat_Merge_SeqsToMPI *merge;
4735   PetscContainer       container;
4736 
4737   PetscFunctionBegin;
4738   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4739   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4740 
4741   PetscCallMPI(MPI_Comm_size(comm, &size));
4742   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4743 
4744   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4745   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4746   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4747   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4748   aa = a_a;
4749 
4750   bi     = merge->bi;
4751   bj     = merge->bj;
4752   buf_ri = merge->buf_ri;
4753   buf_rj = merge->buf_rj;
4754 
4755   PetscCall(PetscMalloc1(size, &status));
4756   owners = merge->rowmap->range;
4757   len_s  = merge->len_s;
4758 
4759   /* send and recv matrix values */
4760   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4761   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4762 
4763   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4764   for (proc = 0, k = 0; proc < size; proc++) {
4765     if (!len_s[proc]) continue;
4766     i = owners[proc];
4767     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4768     k++;
4769   }
4770 
4771   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4772   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4773   PetscCall(PetscFree(status));
4774 
4775   PetscCall(PetscFree(s_waits));
4776   PetscCall(PetscFree(r_waits));
4777 
4778   /* insert mat values of mpimat */
4779   PetscCall(PetscMalloc1(N, &ba_i));
4780   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4781 
4782   for (k = 0; k < merge->nrecv; k++) {
4783     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4784     nrows       = *(buf_ri_k[k]);
4785     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4786     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4787   }
4788 
4789   /* set values of ba */
4790   m = merge->rowmap->n;
4791   for (i = 0; i < m; i++) {
4792     arow = owners[rank] + i;
4793     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4794     bnzi = bi[i + 1] - bi[i];
4795     PetscCall(PetscArrayzero(ba_i, bnzi));
4796 
4797     /* add local non-zero vals of this proc's seqmat into ba */
4798     anzi   = ai[arow + 1] - ai[arow];
4799     aj     = a->j + ai[arow];
4800     aa     = a_a + ai[arow];
4801     nextaj = 0;
4802     for (j = 0; nextaj < anzi; j++) {
4803       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4804         ba_i[j] += aa[nextaj++];
4805       }
4806     }
4807 
4808     /* add received vals into ba */
4809     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4810       /* i-th row */
4811       if (i == *nextrow[k]) {
4812         anzi   = *(nextai[k] + 1) - *nextai[k];
4813         aj     = buf_rj[k] + *(nextai[k]);
4814         aa     = abuf_r[k] + *(nextai[k]);
4815         nextaj = 0;
4816         for (j = 0; nextaj < anzi; j++) {
4817           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4818             ba_i[j] += aa[nextaj++];
4819           }
4820         }
4821         nextrow[k]++;
4822         nextai[k]++;
4823       }
4824     }
4825     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4826   }
4827   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4828   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4829   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4830 
4831   PetscCall(PetscFree(abuf_r[0]));
4832   PetscCall(PetscFree(abuf_r));
4833   PetscCall(PetscFree(ba_i));
4834   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4835   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4836   PetscFunctionReturn(PETSC_SUCCESS);
4837 }
4838 
4839 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4840 {
4841   Mat                  B_mpi;
4842   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4843   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4844   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4845   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4846   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4847   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4848   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4849   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4850   MPI_Status          *status;
4851   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4852   PetscBT              lnkbt;
4853   Mat_Merge_SeqsToMPI *merge;
4854   PetscContainer       container;
4855 
4856   PetscFunctionBegin;
4857   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4858 
4859   /* make sure it is a PETSc comm */
4860   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4861   PetscCallMPI(MPI_Comm_size(comm, &size));
4862   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4863 
4864   PetscCall(PetscNew(&merge));
4865   PetscCall(PetscMalloc1(size, &status));
4866 
4867   /* determine row ownership */
4868   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4869   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4870   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4871   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4872   PetscCall(PetscLayoutSetUp(merge->rowmap));
4873   PetscCall(PetscMalloc1(size, &len_si));
4874   PetscCall(PetscMalloc1(size, &merge->len_s));
4875 
4876   m      = merge->rowmap->n;
4877   owners = merge->rowmap->range;
4878 
4879   /* determine the number of messages to send, their lengths */
4880   len_s = merge->len_s;
4881 
4882   len          = 0; /* length of buf_si[] */
4883   merge->nsend = 0;
4884   for (proc = 0; proc < size; proc++) {
4885     len_si[proc] = 0;
4886     if (proc == rank) {
4887       len_s[proc] = 0;
4888     } else {
4889       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4890       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4891     }
4892     if (len_s[proc]) {
4893       merge->nsend++;
4894       nrows = 0;
4895       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4896         if (ai[i + 1] > ai[i]) nrows++;
4897       }
4898       len_si[proc] = 2 * (nrows + 1);
4899       len += len_si[proc];
4900     }
4901   }
4902 
4903   /* determine the number and length of messages to receive for ij-structure */
4904   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4905   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4906 
4907   /* post the Irecv of j-structure */
4908   PetscCall(PetscCommGetNewTag(comm, &tagj));
4909   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4910 
4911   /* post the Isend of j-structure */
4912   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4913 
4914   for (proc = 0, k = 0; proc < size; proc++) {
4915     if (!len_s[proc]) continue;
4916     i = owners[proc];
4917     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4918     k++;
4919   }
4920 
4921   /* receives and sends of j-structure are complete */
4922   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4923   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4924 
4925   /* send and recv i-structure */
4926   PetscCall(PetscCommGetNewTag(comm, &tagi));
4927   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4928 
4929   PetscCall(PetscMalloc1(len + 1, &buf_s));
4930   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4931   for (proc = 0, k = 0; proc < size; proc++) {
4932     if (!len_s[proc]) continue;
4933     /* form outgoing message for i-structure:
4934          buf_si[0]:                 nrows to be sent
4935                [1:nrows]:           row index (global)
4936                [nrows+1:2*nrows+1]: i-structure index
4937     */
4938     nrows       = len_si[proc] / 2 - 1;
4939     buf_si_i    = buf_si + nrows + 1;
4940     buf_si[0]   = nrows;
4941     buf_si_i[0] = 0;
4942     nrows       = 0;
4943     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4944       anzi = ai[i + 1] - ai[i];
4945       if (anzi) {
4946         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4947         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4948         nrows++;
4949       }
4950     }
4951     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4952     k++;
4953     buf_si += len_si[proc];
4954   }
4955 
4956   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4957   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4958 
4959   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4960   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4961 
4962   PetscCall(PetscFree(len_si));
4963   PetscCall(PetscFree(len_ri));
4964   PetscCall(PetscFree(rj_waits));
4965   PetscCall(PetscFree2(si_waits, sj_waits));
4966   PetscCall(PetscFree(ri_waits));
4967   PetscCall(PetscFree(buf_s));
4968   PetscCall(PetscFree(status));
4969 
4970   /* compute a local seq matrix in each processor */
4971   /* allocate bi array and free space for accumulating nonzero column info */
4972   PetscCall(PetscMalloc1(m + 1, &bi));
4973   bi[0] = 0;
4974 
4975   /* create and initialize a linked list */
4976   nlnk = N + 1;
4977   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4978 
4979   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4980   len = ai[owners[rank + 1]] - ai[owners[rank]];
4981   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4982 
4983   current_space = free_space;
4984 
4985   /* determine symbolic info for each local row */
4986   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4987 
4988   for (k = 0; k < merge->nrecv; k++) {
4989     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4990     nrows       = *buf_ri_k[k];
4991     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4992     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4993   }
4994 
4995   MatPreallocateBegin(comm, m, n, dnz, onz);
4996   len = 0;
4997   for (i = 0; i < m; i++) {
4998     bnzi = 0;
4999     /* add local non-zero cols of this proc's seqmat into lnk */
5000     arow = owners[rank] + i;
5001     anzi = ai[arow + 1] - ai[arow];
5002     aj   = a->j + ai[arow];
5003     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5004     bnzi += nlnk;
5005     /* add received col data into lnk */
5006     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5007       if (i == *nextrow[k]) {            /* i-th row */
5008         anzi = *(nextai[k] + 1) - *nextai[k];
5009         aj   = buf_rj[k] + *nextai[k];
5010         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5011         bnzi += nlnk;
5012         nextrow[k]++;
5013         nextai[k]++;
5014       }
5015     }
5016     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5017 
5018     /* if free space is not available, make more free space */
5019     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5020     /* copy data into free space, then initialize lnk */
5021     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5022     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5023 
5024     current_space->array += bnzi;
5025     current_space->local_used += bnzi;
5026     current_space->local_remaining -= bnzi;
5027 
5028     bi[i + 1] = bi[i] + bnzi;
5029   }
5030 
5031   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5032 
5033   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5034   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5035   PetscCall(PetscLLDestroy(lnk, lnkbt));
5036 
5037   /* create symbolic parallel matrix B_mpi */
5038   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5039   PetscCall(MatCreate(comm, &B_mpi));
5040   if (n == PETSC_DECIDE) {
5041     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5042   } else {
5043     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5044   }
5045   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5046   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5047   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5048   MatPreallocateEnd(dnz, onz);
5049   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5050 
5051   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5052   B_mpi->assembled = PETSC_FALSE;
5053   merge->bi        = bi;
5054   merge->bj        = bj;
5055   merge->buf_ri    = buf_ri;
5056   merge->buf_rj    = buf_rj;
5057   merge->coi       = NULL;
5058   merge->coj       = NULL;
5059   merge->owners_co = NULL;
5060 
5061   PetscCall(PetscCommDestroy(&comm));
5062 
5063   /* attach the supporting struct to B_mpi for reuse */
5064   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5065   PetscCall(PetscContainerSetPointer(container, merge));
5066   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5067   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5068   PetscCall(PetscContainerDestroy(&container));
5069   *mpimat = B_mpi;
5070 
5071   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5072   PetscFunctionReturn(PETSC_SUCCESS);
5073 }
5074 
5075 /*@C
5076   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5077   matrices from each processor
5078 
5079   Collective
5080 
5081   Input Parameters:
5082 + comm   - the communicators the parallel matrix will live on
5083 . seqmat - the input sequential matrices
5084 . m      - number of local rows (or `PETSC_DECIDE`)
5085 . n      - number of local columns (or `PETSC_DECIDE`)
5086 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5087 
5088   Output Parameter:
5089 . mpimat - the parallel matrix generated
5090 
5091   Level: advanced
5092 
5093   Note:
5094   The dimensions of the sequential matrix in each processor MUST be the same.
5095   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5096   destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5097 
5098 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5099 @*/
5100 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5101 {
5102   PetscMPIInt size;
5103 
5104   PetscFunctionBegin;
5105   PetscCallMPI(MPI_Comm_size(comm, &size));
5106   if (size == 1) {
5107     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5108     if (scall == MAT_INITIAL_MATRIX) {
5109       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5110     } else {
5111       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5112     }
5113     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5114     PetscFunctionReturn(PETSC_SUCCESS);
5115   }
5116   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5117   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5118   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5119   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5120   PetscFunctionReturn(PETSC_SUCCESS);
5121 }
5122 
5123 /*@
5124   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5125 
5126   Not Collective
5127 
5128   Input Parameter:
5129 . A - the matrix
5130 
5131   Output Parameter:
5132 . A_loc - the local sequential matrix generated
5133 
5134   Level: developer
5135 
5136   Notes:
5137   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5138   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5139   `n` is the global column count obtained with `MatGetSize()`
5140 
5141   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5142 
5143   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5144 
5145   Destroy the matrix with `MatDestroy()`
5146 
5147 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5148 @*/
5149 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5150 {
5151   PetscBool mpi;
5152 
5153   PetscFunctionBegin;
5154   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5155   if (mpi) {
5156     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5157   } else {
5158     *A_loc = A;
5159     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5160   }
5161   PetscFunctionReturn(PETSC_SUCCESS);
5162 }
5163 
5164 /*@
5165   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5166 
5167   Not Collective
5168 
5169   Input Parameters:
5170 + A     - the matrix
5171 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5172 
5173   Output Parameter:
5174 . A_loc - the local sequential matrix generated
5175 
5176   Level: developer
5177 
5178   Notes:
5179   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5180   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5181   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5182 
5183   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5184 
5185   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5186   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5187   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5188   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5189 
5190 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5191 @*/
5192 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5193 {
5194   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5195   Mat_SeqAIJ        *mat, *a, *b;
5196   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5197   const PetscScalar *aa, *ba, *aav, *bav;
5198   PetscScalar       *ca, *cam;
5199   PetscMPIInt        size;
5200   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5201   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5202   PetscBool          match;
5203 
5204   PetscFunctionBegin;
5205   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5206   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5207   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5208   if (size == 1) {
5209     if (scall == MAT_INITIAL_MATRIX) {
5210       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5211       *A_loc = mpimat->A;
5212     } else if (scall == MAT_REUSE_MATRIX) {
5213       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5214     }
5215     PetscFunctionReturn(PETSC_SUCCESS);
5216   }
5217 
5218   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5219   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5220   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5221   ai = a->i;
5222   aj = a->j;
5223   bi = b->i;
5224   bj = b->j;
5225   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5226   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5227   aa = aav;
5228   ba = bav;
5229   if (scall == MAT_INITIAL_MATRIX) {
5230     PetscCall(PetscMalloc1(1 + am, &ci));
5231     ci[0] = 0;
5232     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5233     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5234     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5235     k = 0;
5236     for (i = 0; i < am; i++) {
5237       ncols_o = bi[i + 1] - bi[i];
5238       ncols_d = ai[i + 1] - ai[i];
5239       /* off-diagonal portion of A */
5240       for (jo = 0; jo < ncols_o; jo++) {
5241         col = cmap[*bj];
5242         if (col >= cstart) break;
5243         cj[k] = col;
5244         bj++;
5245         ca[k++] = *ba++;
5246       }
5247       /* diagonal portion of A */
5248       for (j = 0; j < ncols_d; j++) {
5249         cj[k]   = cstart + *aj++;
5250         ca[k++] = *aa++;
5251       }
5252       /* off-diagonal portion of A */
5253       for (j = jo; j < ncols_o; j++) {
5254         cj[k]   = cmap[*bj++];
5255         ca[k++] = *ba++;
5256       }
5257     }
5258     /* put together the new matrix */
5259     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5260     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5261     /* Since these are PETSc arrays, change flags to free them as necessary. */
5262     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5263     mat->free_a  = PETSC_TRUE;
5264     mat->free_ij = PETSC_TRUE;
5265     mat->nonew   = 0;
5266   } else if (scall == MAT_REUSE_MATRIX) {
5267     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5268     ci  = mat->i;
5269     cj  = mat->j;
5270     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5271     for (i = 0; i < am; i++) {
5272       /* off-diagonal portion of A */
5273       ncols_o = bi[i + 1] - bi[i];
5274       for (jo = 0; jo < ncols_o; jo++) {
5275         col = cmap[*bj];
5276         if (col >= cstart) break;
5277         *cam++ = *ba++;
5278         bj++;
5279       }
5280       /* diagonal portion of A */
5281       ncols_d = ai[i + 1] - ai[i];
5282       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5283       /* off-diagonal portion of A */
5284       for (j = jo; j < ncols_o; j++) {
5285         *cam++ = *ba++;
5286         bj++;
5287       }
5288     }
5289     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5290   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5291   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5292   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5293   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5294   PetscFunctionReturn(PETSC_SUCCESS);
5295 }
5296 
5297 /*@
5298   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5299   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5300 
5301   Not Collective
5302 
5303   Input Parameters:
5304 + A     - the matrix
5305 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5306 
5307   Output Parameters:
5308 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5309 - A_loc - the local sequential matrix generated
5310 
5311   Level: developer
5312 
5313   Note:
5314   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5315   part, then those associated with the off-diagonal part (in its local ordering)
5316 
5317 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5318 @*/
5319 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5320 {
5321   Mat             Ao, Ad;
5322   const PetscInt *cmap;
5323   PetscMPIInt     size;
5324   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5325 
5326   PetscFunctionBegin;
5327   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5328   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5329   if (size == 1) {
5330     if (scall == MAT_INITIAL_MATRIX) {
5331       PetscCall(PetscObjectReference((PetscObject)Ad));
5332       *A_loc = Ad;
5333     } else if (scall == MAT_REUSE_MATRIX) {
5334       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5335     }
5336     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5337     PetscFunctionReturn(PETSC_SUCCESS);
5338   }
5339   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5340   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5341   if (f) {
5342     PetscCall((*f)(A, scall, glob, A_loc));
5343   } else {
5344     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5345     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5346     Mat_SeqAIJ        *c;
5347     PetscInt          *ai = a->i, *aj = a->j;
5348     PetscInt          *bi = b->i, *bj = b->j;
5349     PetscInt          *ci, *cj;
5350     const PetscScalar *aa, *ba;
5351     PetscScalar       *ca;
5352     PetscInt           i, j, am, dn, on;
5353 
5354     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5355     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5356     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5357     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5358     if (scall == MAT_INITIAL_MATRIX) {
5359       PetscInt k;
5360       PetscCall(PetscMalloc1(1 + am, &ci));
5361       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5362       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5363       ci[0] = 0;
5364       for (i = 0, k = 0; i < am; i++) {
5365         const PetscInt ncols_o = bi[i + 1] - bi[i];
5366         const PetscInt ncols_d = ai[i + 1] - ai[i];
5367         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5368         /* diagonal portion of A */
5369         for (j = 0; j < ncols_d; j++, k++) {
5370           cj[k] = *aj++;
5371           ca[k] = *aa++;
5372         }
5373         /* off-diagonal portion of A */
5374         for (j = 0; j < ncols_o; j++, k++) {
5375           cj[k] = dn + *bj++;
5376           ca[k] = *ba++;
5377         }
5378       }
5379       /* put together the new matrix */
5380       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5381       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5382       /* Since these are PETSc arrays, change flags to free them as necessary. */
5383       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5384       c->free_a  = PETSC_TRUE;
5385       c->free_ij = PETSC_TRUE;
5386       c->nonew   = 0;
5387       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5388     } else if (scall == MAT_REUSE_MATRIX) {
5389       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5390       for (i = 0; i < am; i++) {
5391         const PetscInt ncols_d = ai[i + 1] - ai[i];
5392         const PetscInt ncols_o = bi[i + 1] - bi[i];
5393         /* diagonal portion of A */
5394         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5395         /* off-diagonal portion of A */
5396         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5397       }
5398       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5399     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5400     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5401     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5402     if (glob) {
5403       PetscInt cst, *gidx;
5404 
5405       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5406       PetscCall(PetscMalloc1(dn + on, &gidx));
5407       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5408       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5409       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5410     }
5411   }
5412   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5413   PetscFunctionReturn(PETSC_SUCCESS);
5414 }
5415 
5416 /*@C
5417   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5418 
5419   Not Collective
5420 
5421   Input Parameters:
5422 + A     - the matrix
5423 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5424 . row   - index set of rows to extract (or `NULL`)
5425 - col   - index set of columns to extract (or `NULL`)
5426 
5427   Output Parameter:
5428 . A_loc - the local sequential matrix generated
5429 
5430   Level: developer
5431 
5432 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5433 @*/
5434 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5435 {
5436   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5437   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5438   IS          isrowa, iscola;
5439   Mat        *aloc;
5440   PetscBool   match;
5441 
5442   PetscFunctionBegin;
5443   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5444   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5445   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5446   if (!row) {
5447     start = A->rmap->rstart;
5448     end   = A->rmap->rend;
5449     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5450   } else {
5451     isrowa = *row;
5452   }
5453   if (!col) {
5454     start = A->cmap->rstart;
5455     cmap  = a->garray;
5456     nzA   = a->A->cmap->n;
5457     nzB   = a->B->cmap->n;
5458     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5459     ncols = 0;
5460     for (i = 0; i < nzB; i++) {
5461       if (cmap[i] < start) idx[ncols++] = cmap[i];
5462       else break;
5463     }
5464     imark = i;
5465     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5466     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5467     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5468   } else {
5469     iscola = *col;
5470   }
5471   if (scall != MAT_INITIAL_MATRIX) {
5472     PetscCall(PetscMalloc1(1, &aloc));
5473     aloc[0] = *A_loc;
5474   }
5475   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5476   if (!col) { /* attach global id of condensed columns */
5477     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5478   }
5479   *A_loc = aloc[0];
5480   PetscCall(PetscFree(aloc));
5481   if (!row) PetscCall(ISDestroy(&isrowa));
5482   if (!col) PetscCall(ISDestroy(&iscola));
5483   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5484   PetscFunctionReturn(PETSC_SUCCESS);
5485 }
5486 
5487 /*
5488  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5489  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5490  * on a global size.
5491  * */
5492 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5493 {
5494   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5495   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5496   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5497   PetscMPIInt            owner;
5498   PetscSFNode           *iremote, *oiremote;
5499   const PetscInt        *lrowindices;
5500   PetscSF                sf, osf;
5501   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5502   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5503   MPI_Comm               comm;
5504   ISLocalToGlobalMapping mapping;
5505   const PetscScalar     *pd_a, *po_a;
5506 
5507   PetscFunctionBegin;
5508   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5509   /* plocalsize is the number of roots
5510    * nrows is the number of leaves
5511    * */
5512   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5513   PetscCall(ISGetLocalSize(rows, &nrows));
5514   PetscCall(PetscCalloc1(nrows, &iremote));
5515   PetscCall(ISGetIndices(rows, &lrowindices));
5516   for (i = 0; i < nrows; i++) {
5517     /* Find a remote index and an owner for a row
5518      * The row could be local or remote
5519      * */
5520     owner = 0;
5521     lidx  = 0;
5522     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5523     iremote[i].index = lidx;
5524     iremote[i].rank  = owner;
5525   }
5526   /* Create SF to communicate how many nonzero columns for each row */
5527   PetscCall(PetscSFCreate(comm, &sf));
5528   /* SF will figure out the number of nonzero columns for each row, and their
5529    * offsets
5530    * */
5531   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5532   PetscCall(PetscSFSetFromOptions(sf));
5533   PetscCall(PetscSFSetUp(sf));
5534 
5535   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5536   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5537   PetscCall(PetscCalloc1(nrows, &pnnz));
5538   roffsets[0] = 0;
5539   roffsets[1] = 0;
5540   for (i = 0; i < plocalsize; i++) {
5541     /* diagonal */
5542     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5543     /* off-diagonal */
5544     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5545     /* compute offsets so that we relative location for each row */
5546     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5547     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5548   }
5549   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5550   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5551   /* 'r' means root, and 'l' means leaf */
5552   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5553   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5554   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5555   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5556   PetscCall(PetscSFDestroy(&sf));
5557   PetscCall(PetscFree(roffsets));
5558   PetscCall(PetscFree(nrcols));
5559   dntotalcols = 0;
5560   ontotalcols = 0;
5561   ncol        = 0;
5562   for (i = 0; i < nrows; i++) {
5563     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5564     ncol    = PetscMax(pnnz[i], ncol);
5565     /* diagonal */
5566     dntotalcols += nlcols[i * 2 + 0];
5567     /* off-diagonal */
5568     ontotalcols += nlcols[i * 2 + 1];
5569   }
5570   /* We do not need to figure the right number of columns
5571    * since all the calculations will be done by going through the raw data
5572    * */
5573   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5574   PetscCall(MatSetUp(*P_oth));
5575   PetscCall(PetscFree(pnnz));
5576   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5577   /* diagonal */
5578   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5579   /* off-diagonal */
5580   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5581   /* diagonal */
5582   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5583   /* off-diagonal */
5584   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5585   dntotalcols = 0;
5586   ontotalcols = 0;
5587   ntotalcols  = 0;
5588   for (i = 0; i < nrows; i++) {
5589     owner = 0;
5590     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5591     /* Set iremote for diag matrix */
5592     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5593       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5594       iremote[dntotalcols].rank  = owner;
5595       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5596       ilocal[dntotalcols++] = ntotalcols++;
5597     }
5598     /* off-diagonal */
5599     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5600       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5601       oiremote[ontotalcols].rank  = owner;
5602       oilocal[ontotalcols++]      = ntotalcols++;
5603     }
5604   }
5605   PetscCall(ISRestoreIndices(rows, &lrowindices));
5606   PetscCall(PetscFree(loffsets));
5607   PetscCall(PetscFree(nlcols));
5608   PetscCall(PetscSFCreate(comm, &sf));
5609   /* P serves as roots and P_oth is leaves
5610    * Diag matrix
5611    * */
5612   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5613   PetscCall(PetscSFSetFromOptions(sf));
5614   PetscCall(PetscSFSetUp(sf));
5615 
5616   PetscCall(PetscSFCreate(comm, &osf));
5617   /* off-diagonal */
5618   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5619   PetscCall(PetscSFSetFromOptions(osf));
5620   PetscCall(PetscSFSetUp(osf));
5621   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5622   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5623   /* operate on the matrix internal data to save memory */
5624   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5625   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5626   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5627   /* Convert to global indices for diag matrix */
5628   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5629   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5630   /* We want P_oth store global indices */
5631   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5632   /* Use memory scalable approach */
5633   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5634   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5635   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5636   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5637   /* Convert back to local indices */
5638   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5639   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5640   nout = 0;
5641   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5642   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5643   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5644   /* Exchange values */
5645   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5646   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5647   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5648   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5649   /* Stop PETSc from shrinking memory */
5650   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5651   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5652   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5653   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5654   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5655   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5656   PetscCall(PetscSFDestroy(&sf));
5657   PetscCall(PetscSFDestroy(&osf));
5658   PetscFunctionReturn(PETSC_SUCCESS);
5659 }
5660 
5661 /*
5662  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5663  * This supports MPIAIJ and MAIJ
5664  * */
5665 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5666 {
5667   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5668   Mat_SeqAIJ *p_oth;
5669   IS          rows, map;
5670   PetscHMapI  hamp;
5671   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5672   MPI_Comm    comm;
5673   PetscSF     sf, osf;
5674   PetscBool   has;
5675 
5676   PetscFunctionBegin;
5677   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5678   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5679   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5680    *  and then create a submatrix (that often is an overlapping matrix)
5681    * */
5682   if (reuse == MAT_INITIAL_MATRIX) {
5683     /* Use a hash table to figure out unique keys */
5684     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5685     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5686     count = 0;
5687     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5688     for (i = 0; i < a->B->cmap->n; i++) {
5689       key = a->garray[i] / dof;
5690       PetscCall(PetscHMapIHas(hamp, key, &has));
5691       if (!has) {
5692         mapping[i] = count;
5693         PetscCall(PetscHMapISet(hamp, key, count++));
5694       } else {
5695         /* Current 'i' has the same value the previous step */
5696         mapping[i] = count - 1;
5697       }
5698     }
5699     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5700     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5701     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5702     PetscCall(PetscCalloc1(htsize, &rowindices));
5703     off = 0;
5704     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5705     PetscCall(PetscHMapIDestroy(&hamp));
5706     PetscCall(PetscSortInt(htsize, rowindices));
5707     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5708     /* In case, the matrix was already created but users want to recreate the matrix */
5709     PetscCall(MatDestroy(P_oth));
5710     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5711     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5712     PetscCall(ISDestroy(&map));
5713     PetscCall(ISDestroy(&rows));
5714   } else if (reuse == MAT_REUSE_MATRIX) {
5715     /* If matrix was already created, we simply update values using SF objects
5716      * that as attached to the matrix earlier.
5717      */
5718     const PetscScalar *pd_a, *po_a;
5719 
5720     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5721     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5722     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5723     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5724     /* Update values in place */
5725     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5726     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5727     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5728     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5729     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5730     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5731     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5732     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5733   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5734   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5735   PetscFunctionReturn(PETSC_SUCCESS);
5736 }
5737 
5738 /*@C
5739   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5740 
5741   Collective
5742 
5743   Input Parameters:
5744 + A     - the first matrix in `MATMPIAIJ` format
5745 . B     - the second matrix in `MATMPIAIJ` format
5746 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5747 
5748   Output Parameters:
5749 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5750 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5751 - B_seq - the sequential matrix generated
5752 
5753   Level: developer
5754 
5755 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5756 @*/
5757 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5758 {
5759   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5760   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5761   IS          isrowb, iscolb;
5762   Mat        *bseq = NULL;
5763 
5764   PetscFunctionBegin;
5765   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5766              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5767   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5768 
5769   if (scall == MAT_INITIAL_MATRIX) {
5770     start = A->cmap->rstart;
5771     cmap  = a->garray;
5772     nzA   = a->A->cmap->n;
5773     nzB   = a->B->cmap->n;
5774     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5775     ncols = 0;
5776     for (i = 0; i < nzB; i++) { /* row < local row index */
5777       if (cmap[i] < start) idx[ncols++] = cmap[i];
5778       else break;
5779     }
5780     imark = i;
5781     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5782     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5783     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5784     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5785   } else {
5786     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5787     isrowb = *rowb;
5788     iscolb = *colb;
5789     PetscCall(PetscMalloc1(1, &bseq));
5790     bseq[0] = *B_seq;
5791   }
5792   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5793   *B_seq = bseq[0];
5794   PetscCall(PetscFree(bseq));
5795   if (!rowb) {
5796     PetscCall(ISDestroy(&isrowb));
5797   } else {
5798     *rowb = isrowb;
5799   }
5800   if (!colb) {
5801     PetscCall(ISDestroy(&iscolb));
5802   } else {
5803     *colb = iscolb;
5804   }
5805   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5806   PetscFunctionReturn(PETSC_SUCCESS);
5807 }
5808 
5809 /*
5810     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5811     of the OFF-DIAGONAL portion of local A
5812 
5813     Collective
5814 
5815    Input Parameters:
5816 +    A,B - the matrices in `MATMPIAIJ` format
5817 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5818 
5819    Output Parameter:
5820 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5821 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5822 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5823 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5824 
5825     Developer Note:
5826     This directly accesses information inside the VecScatter associated with the matrix-vector product
5827      for this matrix. This is not desirable..
5828 
5829     Level: developer
5830 
5831 */
5832 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5833 {
5834   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5835   Mat_SeqAIJ        *b_oth;
5836   VecScatter         ctx;
5837   MPI_Comm           comm;
5838   const PetscMPIInt *rprocs, *sprocs;
5839   const PetscInt    *srow, *rstarts, *sstarts;
5840   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5841   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5842   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5843   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5844   PetscMPIInt        size, tag, rank, nreqs;
5845 
5846   PetscFunctionBegin;
5847   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5848   PetscCallMPI(MPI_Comm_size(comm, &size));
5849 
5850   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5851              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5852   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5853   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5854 
5855   if (size == 1) {
5856     startsj_s = NULL;
5857     bufa_ptr  = NULL;
5858     *B_oth    = NULL;
5859     PetscFunctionReturn(PETSC_SUCCESS);
5860   }
5861 
5862   ctx = a->Mvctx;
5863   tag = ((PetscObject)ctx)->tag;
5864 
5865   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5866   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5867   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5868   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5869   PetscCall(PetscMalloc1(nreqs, &reqs));
5870   rwaits = reqs;
5871   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5872 
5873   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5874   if (scall == MAT_INITIAL_MATRIX) {
5875     /* i-array */
5876     /*  post receives */
5877     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5878     for (i = 0; i < nrecvs; i++) {
5879       rowlen = rvalues + rstarts[i] * rbs;
5880       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5881       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5882     }
5883 
5884     /* pack the outgoing message */
5885     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5886 
5887     sstartsj[0] = 0;
5888     rstartsj[0] = 0;
5889     len         = 0; /* total length of j or a array to be sent */
5890     if (nsends) {
5891       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5892       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5893     }
5894     for (i = 0; i < nsends; i++) {
5895       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5896       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5897       for (j = 0; j < nrows; j++) {
5898         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5899         for (l = 0; l < sbs; l++) {
5900           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5901 
5902           rowlen[j * sbs + l] = ncols;
5903 
5904           len += ncols;
5905           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5906         }
5907         k++;
5908       }
5909       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5910 
5911       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5912     }
5913     /* recvs and sends of i-array are completed */
5914     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5915     PetscCall(PetscFree(svalues));
5916 
5917     /* allocate buffers for sending j and a arrays */
5918     PetscCall(PetscMalloc1(len + 1, &bufj));
5919     PetscCall(PetscMalloc1(len + 1, &bufa));
5920 
5921     /* create i-array of B_oth */
5922     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5923 
5924     b_othi[0] = 0;
5925     len       = 0; /* total length of j or a array to be received */
5926     k         = 0;
5927     for (i = 0; i < nrecvs; i++) {
5928       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5929       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5930       for (j = 0; j < nrows; j++) {
5931         b_othi[k + 1] = b_othi[k] + rowlen[j];
5932         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5933         k++;
5934       }
5935       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5936     }
5937     PetscCall(PetscFree(rvalues));
5938 
5939     /* allocate space for j and a arrays of B_oth */
5940     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5941     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5942 
5943     /* j-array */
5944     /*  post receives of j-array */
5945     for (i = 0; i < nrecvs; i++) {
5946       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5947       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5948     }
5949 
5950     /* pack the outgoing message j-array */
5951     if (nsends) k = sstarts[0];
5952     for (i = 0; i < nsends; i++) {
5953       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5954       bufJ  = bufj + sstartsj[i];
5955       for (j = 0; j < nrows; j++) {
5956         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5957         for (ll = 0; ll < sbs; ll++) {
5958           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5959           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5960           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5961         }
5962       }
5963       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5964     }
5965 
5966     /* recvs and sends of j-array are completed */
5967     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5968   } else if (scall == MAT_REUSE_MATRIX) {
5969     sstartsj = *startsj_s;
5970     rstartsj = *startsj_r;
5971     bufa     = *bufa_ptr;
5972     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5973     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5974   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5975 
5976   /* a-array */
5977   /*  post receives of a-array */
5978   for (i = 0; i < nrecvs; i++) {
5979     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5980     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5981   }
5982 
5983   /* pack the outgoing message a-array */
5984   if (nsends) k = sstarts[0];
5985   for (i = 0; i < nsends; i++) {
5986     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5987     bufA  = bufa + sstartsj[i];
5988     for (j = 0; j < nrows; j++) {
5989       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5990       for (ll = 0; ll < sbs; ll++) {
5991         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5992         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5993         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5994       }
5995     }
5996     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5997   }
5998   /* recvs and sends of a-array are completed */
5999   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6000   PetscCall(PetscFree(reqs));
6001 
6002   if (scall == MAT_INITIAL_MATRIX) {
6003     /* put together the new matrix */
6004     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6005 
6006     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6007     /* Since these are PETSc arrays, change flags to free them as necessary. */
6008     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6009     b_oth->free_a  = PETSC_TRUE;
6010     b_oth->free_ij = PETSC_TRUE;
6011     b_oth->nonew   = 0;
6012 
6013     PetscCall(PetscFree(bufj));
6014     if (!startsj_s || !bufa_ptr) {
6015       PetscCall(PetscFree2(sstartsj, rstartsj));
6016       PetscCall(PetscFree(bufa_ptr));
6017     } else {
6018       *startsj_s = sstartsj;
6019       *startsj_r = rstartsj;
6020       *bufa_ptr  = bufa;
6021     }
6022   } else if (scall == MAT_REUSE_MATRIX) {
6023     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6024   }
6025 
6026   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6027   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6028   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6029   PetscFunctionReturn(PETSC_SUCCESS);
6030 }
6031 
6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6035 #if defined(PETSC_HAVE_MKL_SPARSE)
6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6037 #endif
6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6040 #if defined(PETSC_HAVE_ELEMENTAL)
6041 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6042 #endif
6043 #if defined(PETSC_HAVE_SCALAPACK)
6044 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6045 #endif
6046 #if defined(PETSC_HAVE_HYPRE)
6047 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6048 #endif
6049 #if defined(PETSC_HAVE_CUDA)
6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6051 #endif
6052 #if defined(PETSC_HAVE_HIP)
6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6054 #endif
6055 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6057 #endif
6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6059 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6060 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6061 
6062 /*
6063     Computes (B'*A')' since computing B*A directly is untenable
6064 
6065                n                       p                          p
6066         [             ]       [             ]         [                 ]
6067       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6068         [             ]       [             ]         [                 ]
6069 
6070 */
6071 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6072 {
6073   Mat At, Bt, Ct;
6074 
6075   PetscFunctionBegin;
6076   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6077   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6078   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6079   PetscCall(MatDestroy(&At));
6080   PetscCall(MatDestroy(&Bt));
6081   PetscCall(MatTransposeSetPrecursor(Ct, C));
6082   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6083   PetscCall(MatDestroy(&Ct));
6084   PetscFunctionReturn(PETSC_SUCCESS);
6085 }
6086 
6087 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6088 {
6089   PetscBool cisdense;
6090 
6091   PetscFunctionBegin;
6092   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6093   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6094   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6095   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6096   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6097   PetscCall(MatSetUp(C));
6098 
6099   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6100   PetscFunctionReturn(PETSC_SUCCESS);
6101 }
6102 
6103 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6104 {
6105   Mat_Product *product = C->product;
6106   Mat          A = product->A, B = product->B;
6107 
6108   PetscFunctionBegin;
6109   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6110              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6111   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6112   C->ops->productsymbolic = MatProductSymbolic_AB;
6113   PetscFunctionReturn(PETSC_SUCCESS);
6114 }
6115 
6116 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6117 {
6118   Mat_Product *product = C->product;
6119 
6120   PetscFunctionBegin;
6121   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6122   PetscFunctionReturn(PETSC_SUCCESS);
6123 }
6124 
6125 /*
6126    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6127 
6128   Input Parameters:
6129 
6130     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6131     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6132 
6133     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6134 
6135     For Set1, j1[] contains column indices of the nonzeros.
6136     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6137     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6138     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6139 
6140     Similar for Set2.
6141 
6142     This routine merges the two sets of nonzeros row by row and removes repeats.
6143 
6144   Output Parameters: (memory is allocated by the caller)
6145 
6146     i[],j[]: the CSR of the merged matrix, which has m rows.
6147     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6148     imap2[]: similar to imap1[], but for Set2.
6149     Note we order nonzeros row-by-row and from left to right.
6150 */
6151 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6152 {
6153   PetscInt   r, m; /* Row index of mat */
6154   PetscCount t, t1, t2, b1, e1, b2, e2;
6155 
6156   PetscFunctionBegin;
6157   PetscCall(MatGetLocalSize(mat, &m, NULL));
6158   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6159   i[0]        = 0;
6160   for (r = 0; r < m; r++) { /* Do row by row merging */
6161     b1 = rowBegin1[r];
6162     e1 = rowEnd1[r];
6163     b2 = rowBegin2[r];
6164     e2 = rowEnd2[r];
6165     while (b1 < e1 && b2 < e2) {
6166       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6167         j[t]      = j1[b1];
6168         imap1[t1] = t;
6169         imap2[t2] = t;
6170         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6171         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6172         t1++;
6173         t2++;
6174         t++;
6175       } else if (j1[b1] < j2[b2]) {
6176         j[t]      = j1[b1];
6177         imap1[t1] = t;
6178         b1 += jmap1[t1 + 1] - jmap1[t1];
6179         t1++;
6180         t++;
6181       } else {
6182         j[t]      = j2[b2];
6183         imap2[t2] = t;
6184         b2 += jmap2[t2 + 1] - jmap2[t2];
6185         t2++;
6186         t++;
6187       }
6188     }
6189     /* Merge the remaining in either j1[] or j2[] */
6190     while (b1 < e1) {
6191       j[t]      = j1[b1];
6192       imap1[t1] = t;
6193       b1 += jmap1[t1 + 1] - jmap1[t1];
6194       t1++;
6195       t++;
6196     }
6197     while (b2 < e2) {
6198       j[t]      = j2[b2];
6199       imap2[t2] = t;
6200       b2 += jmap2[t2 + 1] - jmap2[t2];
6201       t2++;
6202       t++;
6203     }
6204     i[r + 1] = t;
6205   }
6206   PetscFunctionReturn(PETSC_SUCCESS);
6207 }
6208 
6209 /*
6210   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6211 
6212   Input Parameters:
6213     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6214     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6215       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6216 
6217       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6218       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6219 
6220   Output Parameters:
6221     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6222     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6223       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6224       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6225 
6226     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6227       Atot: number of entries belonging to the diagonal block.
6228       Annz: number of unique nonzeros belonging to the diagonal block.
6229       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6230         repeats (i.e., same 'i,j' pair).
6231       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6232         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6233 
6234       Atot: number of entries belonging to the diagonal block
6235       Annz: number of unique nonzeros belonging to the diagonal block.
6236 
6237     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6238 
6239     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6240 */
6241 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6242 {
6243   PetscInt    cstart, cend, rstart, rend, row, col;
6244   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6245   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6246   PetscCount  k, m, p, q, r, s, mid;
6247   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6248 
6249   PetscFunctionBegin;
6250   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6251   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6252   m = rend - rstart;
6253 
6254   /* Skip negative rows */
6255   for (k = 0; k < n; k++)
6256     if (i[k] >= 0) break;
6257 
6258   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6259      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6260   */
6261   while (k < n) {
6262     row = i[k];
6263     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6264     for (s = k; s < n; s++)
6265       if (i[s] != row) break;
6266 
6267     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6268     for (p = k; p < s; p++) {
6269       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6270       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6271     }
6272     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6273     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6274     rowBegin[row - rstart] = k;
6275     rowMid[row - rstart]   = mid;
6276     rowEnd[row - rstart]   = s;
6277 
6278     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6279     Atot += mid - k;
6280     Btot += s - mid;
6281 
6282     /* Count unique nonzeros of this diag row */
6283     for (p = k; p < mid;) {
6284       col = j[p];
6285       do {
6286         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6287         p++;
6288       } while (p < mid && j[p] == col);
6289       Annz++;
6290     }
6291 
6292     /* Count unique nonzeros of this offdiag row */
6293     for (p = mid; p < s;) {
6294       col = j[p];
6295       do {
6296         p++;
6297       } while (p < s && j[p] == col);
6298       Bnnz++;
6299     }
6300     k = s;
6301   }
6302 
6303   /* Allocation according to Atot, Btot, Annz, Bnnz */
6304   PetscCall(PetscMalloc1(Atot, &Aperm));
6305   PetscCall(PetscMalloc1(Btot, &Bperm));
6306   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6307   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6308 
6309   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6310   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6311   for (r = 0; r < m; r++) {
6312     k   = rowBegin[r];
6313     mid = rowMid[r];
6314     s   = rowEnd[r];
6315     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6316     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6317     Atot += mid - k;
6318     Btot += s - mid;
6319 
6320     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6321     for (p = k; p < mid;) {
6322       col = j[p];
6323       q   = p;
6324       do {
6325         p++;
6326       } while (p < mid && j[p] == col);
6327       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6328       Annz++;
6329     }
6330 
6331     for (p = mid; p < s;) {
6332       col = j[p];
6333       q   = p;
6334       do {
6335         p++;
6336       } while (p < s && j[p] == col);
6337       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6338       Bnnz++;
6339     }
6340   }
6341   /* Output */
6342   *Aperm_ = Aperm;
6343   *Annz_  = Annz;
6344   *Atot_  = Atot;
6345   *Ajmap_ = Ajmap;
6346   *Bperm_ = Bperm;
6347   *Bnnz_  = Bnnz;
6348   *Btot_  = Btot;
6349   *Bjmap_ = Bjmap;
6350   PetscFunctionReturn(PETSC_SUCCESS);
6351 }
6352 
6353 /*
6354   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6355 
6356   Input Parameters:
6357     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6358     nnz:  number of unique nonzeros in the merged matrix
6359     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6360     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6361 
6362   Output Parameter: (memory is allocated by the caller)
6363     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6364 
6365   Example:
6366     nnz1 = 4
6367     nnz  = 6
6368     imap = [1,3,4,5]
6369     jmap = [0,3,5,6,7]
6370    then,
6371     jmap_new = [0,0,3,3,5,6,7]
6372 */
6373 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6374 {
6375   PetscCount k, p;
6376 
6377   PetscFunctionBegin;
6378   jmap_new[0] = 0;
6379   p           = nnz;                /* p loops over jmap_new[] backwards */
6380   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6381     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6382   }
6383   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6384   PetscFunctionReturn(PETSC_SUCCESS);
6385 }
6386 
6387 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6388 {
6389   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6390 
6391   PetscFunctionBegin;
6392   PetscCall(PetscSFDestroy(&coo->sf));
6393   PetscCall(PetscFree(coo->Aperm1));
6394   PetscCall(PetscFree(coo->Bperm1));
6395   PetscCall(PetscFree(coo->Ajmap1));
6396   PetscCall(PetscFree(coo->Bjmap1));
6397   PetscCall(PetscFree(coo->Aimap2));
6398   PetscCall(PetscFree(coo->Bimap2));
6399   PetscCall(PetscFree(coo->Aperm2));
6400   PetscCall(PetscFree(coo->Bperm2));
6401   PetscCall(PetscFree(coo->Ajmap2));
6402   PetscCall(PetscFree(coo->Bjmap2));
6403   PetscCall(PetscFree(coo->Cperm1));
6404   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6405   PetscCall(PetscFree(coo));
6406   PetscFunctionReturn(PETSC_SUCCESS);
6407 }
6408 
6409 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6410 {
6411   MPI_Comm             comm;
6412   PetscMPIInt          rank, size;
6413   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6414   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6415   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6416   PetscContainer       container;
6417   MatCOOStruct_MPIAIJ *coo;
6418 
6419   PetscFunctionBegin;
6420   PetscCall(PetscFree(mpiaij->garray));
6421   PetscCall(VecDestroy(&mpiaij->lvec));
6422 #if defined(PETSC_USE_CTABLE)
6423   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6424 #else
6425   PetscCall(PetscFree(mpiaij->colmap));
6426 #endif
6427   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6428   mat->assembled     = PETSC_FALSE;
6429   mat->was_assembled = PETSC_FALSE;
6430 
6431   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6432   PetscCallMPI(MPI_Comm_size(comm, &size));
6433   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6434   PetscCall(PetscLayoutSetUp(mat->rmap));
6435   PetscCall(PetscLayoutSetUp(mat->cmap));
6436   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6437   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6438   PetscCall(MatGetLocalSize(mat, &m, &n));
6439   PetscCall(MatGetSize(mat, &M, &N));
6440 
6441   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6442   /* entries come first, then local rows, then remote rows.                     */
6443   PetscCount n1 = coo_n, *perm1;
6444   PetscInt  *i1 = coo_i, *j1 = coo_j;
6445 
6446   PetscCall(PetscMalloc1(n1, &perm1));
6447   for (k = 0; k < n1; k++) perm1[k] = k;
6448 
6449   /* Manipulate indices so that entries with negative row or col indices will have smallest
6450      row indices, local entries will have greater but negative row indices, and remote entries
6451      will have positive row indices.
6452   */
6453   for (k = 0; k < n1; k++) {
6454     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6455     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6456     else {
6457       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6458       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6459     }
6460   }
6461 
6462   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6463   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6464 
6465   /* Advance k to the first entry we need to take care of */
6466   for (k = 0; k < n1; k++)
6467     if (i1[k] > PETSC_MIN_INT) break;
6468   PetscInt i1start = k;
6469 
6470   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6471   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6472 
6473   /*           Send remote rows to their owner                                  */
6474   /* Find which rows should be sent to which remote ranks*/
6475   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6476   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6477   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6478   const PetscInt *ranges;
6479   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6480 
6481   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6482   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6483   for (k = rem; k < n1;) {
6484     PetscMPIInt owner;
6485     PetscInt    firstRow, lastRow;
6486 
6487     /* Locate a row range */
6488     firstRow = i1[k]; /* first row of this owner */
6489     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6490     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6491 
6492     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6493     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6494 
6495     /* All entries in [k,p) belong to this remote owner */
6496     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6497       PetscMPIInt *sendto2;
6498       PetscInt    *nentries2;
6499       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6500 
6501       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6502       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6503       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6504       PetscCall(PetscFree2(sendto, nentries2));
6505       sendto   = sendto2;
6506       nentries = nentries2;
6507       maxNsend = maxNsend2;
6508     }
6509     sendto[nsend]   = owner;
6510     nentries[nsend] = p - k;
6511     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6512     nsend++;
6513     k = p;
6514   }
6515 
6516   /* Build 1st SF to know offsets on remote to send data */
6517   PetscSF      sf1;
6518   PetscInt     nroots = 1, nroots2 = 0;
6519   PetscInt     nleaves = nsend, nleaves2 = 0;
6520   PetscInt    *offsets;
6521   PetscSFNode *iremote;
6522 
6523   PetscCall(PetscSFCreate(comm, &sf1));
6524   PetscCall(PetscMalloc1(nsend, &iremote));
6525   PetscCall(PetscMalloc1(nsend, &offsets));
6526   for (k = 0; k < nsend; k++) {
6527     iremote[k].rank  = sendto[k];
6528     iremote[k].index = 0;
6529     nleaves2 += nentries[k];
6530     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6531   }
6532   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6533   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6534   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6535   PetscCall(PetscSFDestroy(&sf1));
6536   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6537 
6538   /* Build 2nd SF to send remote COOs to their owner */
6539   PetscSF sf2;
6540   nroots  = nroots2;
6541   nleaves = nleaves2;
6542   PetscCall(PetscSFCreate(comm, &sf2));
6543   PetscCall(PetscSFSetFromOptions(sf2));
6544   PetscCall(PetscMalloc1(nleaves, &iremote));
6545   p = 0;
6546   for (k = 0; k < nsend; k++) {
6547     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6548     for (q = 0; q < nentries[k]; q++, p++) {
6549       iremote[p].rank  = sendto[k];
6550       iremote[p].index = offsets[k] + q;
6551     }
6552   }
6553   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6554 
6555   /* Send the remote COOs to their owner */
6556   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6557   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6558   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6559   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6560   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6561   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6562   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6563 
6564   PetscCall(PetscFree(offsets));
6565   PetscCall(PetscFree2(sendto, nentries));
6566 
6567   /* Sort received COOs by row along with the permutation array     */
6568   for (k = 0; k < n2; k++) perm2[k] = k;
6569   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6570 
6571   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6572   PetscCount *Cperm1;
6573   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6574   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves));
6575 
6576   /* Support for HYPRE matrices, kind of a hack.
6577      Swap min column with diagonal so that diagonal values will go first */
6578   PetscBool   hypre;
6579   const char *name;
6580   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6581   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6582   if (hypre) {
6583     PetscInt *minj;
6584     PetscBT   hasdiag;
6585 
6586     PetscCall(PetscBTCreate(m, &hasdiag));
6587     PetscCall(PetscMalloc1(m, &minj));
6588     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6589     for (k = i1start; k < rem; k++) {
6590       if (j1[k] < cstart || j1[k] >= cend) continue;
6591       const PetscInt rindex = i1[k] - rstart;
6592       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6593       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6594     }
6595     for (k = 0; k < n2; k++) {
6596       if (j2[k] < cstart || j2[k] >= cend) continue;
6597       const PetscInt rindex = i2[k] - rstart;
6598       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6599       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6600     }
6601     for (k = i1start; k < rem; k++) {
6602       const PetscInt rindex = i1[k] - rstart;
6603       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6604       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6605       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6606     }
6607     for (k = 0; k < n2; k++) {
6608       const PetscInt rindex = i2[k] - rstart;
6609       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6610       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6611       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6612     }
6613     PetscCall(PetscBTDestroy(&hasdiag));
6614     PetscCall(PetscFree(minj));
6615   }
6616 
6617   /* Split local COOs and received COOs into diag/offdiag portions */
6618   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6619   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6620   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6621   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6622   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6623   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6624 
6625   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6626   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6627   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6628   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6629 
6630   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6631   PetscInt *Ai, *Bi;
6632   PetscInt *Aj, *Bj;
6633 
6634   PetscCall(PetscMalloc1(m + 1, &Ai));
6635   PetscCall(PetscMalloc1(m + 1, &Bi));
6636   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6637   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6638 
6639   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6640   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6641   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6642   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6643   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6644 
6645   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6646   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6647 
6648   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6649   /* expect nonzeros in A/B most likely have local contributing entries        */
6650   PetscInt    Annz = Ai[m];
6651   PetscInt    Bnnz = Bi[m];
6652   PetscCount *Ajmap1_new, *Bjmap1_new;
6653 
6654   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6655   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6656 
6657   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6658   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6659 
6660   PetscCall(PetscFree(Aimap1));
6661   PetscCall(PetscFree(Ajmap1));
6662   PetscCall(PetscFree(Bimap1));
6663   PetscCall(PetscFree(Bjmap1));
6664   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6665   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6666   PetscCall(PetscFree(perm1));
6667   PetscCall(PetscFree3(i2, j2, perm2));
6668 
6669   Ajmap1 = Ajmap1_new;
6670   Bjmap1 = Bjmap1_new;
6671 
6672   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6673   if (Annz < Annz1 + Annz2) {
6674     PetscInt *Aj_new;
6675     PetscCall(PetscMalloc1(Annz, &Aj_new));
6676     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6677     PetscCall(PetscFree(Aj));
6678     Aj = Aj_new;
6679   }
6680 
6681   if (Bnnz < Bnnz1 + Bnnz2) {
6682     PetscInt *Bj_new;
6683     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6684     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6685     PetscCall(PetscFree(Bj));
6686     Bj = Bj_new;
6687   }
6688 
6689   /* Create new submatrices for on-process and off-process coupling                  */
6690   PetscScalar     *Aa, *Ba;
6691   MatType          rtype;
6692   Mat_SeqAIJ      *a, *b;
6693   PetscObjectState state;
6694   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6695   PetscCall(PetscCalloc1(Bnnz, &Ba));
6696   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6697   if (cstart) {
6698     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6699   }
6700 
6701   PetscCall(MatGetRootType_Private(mat, &rtype));
6702 
6703   MatSeqXAIJGetOptions_Private(mpiaij->A);
6704   PetscCall(MatDestroy(&mpiaij->A));
6705   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6706   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6707 
6708   MatSeqXAIJGetOptions_Private(mpiaij->B);
6709   PetscCall(MatDestroy(&mpiaij->B));
6710   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6711   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6712 
6713   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6714   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6715   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6716   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6717 
6718   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6719   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6720   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6721   a->free_a = b->free_a = PETSC_TRUE;
6722   a->free_ij = b->free_ij = PETSC_TRUE;
6723 
6724   /* conversion must happen AFTER multiply setup */
6725   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6726   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6727   PetscCall(VecDestroy(&mpiaij->lvec));
6728   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6729 
6730   // Put the COO struct in a container and then attach that to the matrix
6731   PetscCall(PetscMalloc1(1, &coo));
6732   coo->n       = coo_n;
6733   coo->sf      = sf2;
6734   coo->sendlen = nleaves;
6735   coo->recvlen = nroots;
6736   coo->Annz    = Annz;
6737   coo->Bnnz    = Bnnz;
6738   coo->Annz2   = Annz2;
6739   coo->Bnnz2   = Bnnz2;
6740   coo->Atot1   = Atot1;
6741   coo->Atot2   = Atot2;
6742   coo->Btot1   = Btot1;
6743   coo->Btot2   = Btot2;
6744   coo->Ajmap1  = Ajmap1;
6745   coo->Aperm1  = Aperm1;
6746   coo->Bjmap1  = Bjmap1;
6747   coo->Bperm1  = Bperm1;
6748   coo->Aimap2  = Aimap2;
6749   coo->Ajmap2  = Ajmap2;
6750   coo->Aperm2  = Aperm2;
6751   coo->Bimap2  = Bimap2;
6752   coo->Bjmap2  = Bjmap2;
6753   coo->Bperm2  = Bperm2;
6754   coo->Cperm1  = Cperm1;
6755   // Allocate in preallocation. If not used, it has zero cost on host
6756   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6757   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6758   PetscCall(PetscContainerSetPointer(container, coo));
6759   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6760   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6761   PetscCall(PetscContainerDestroy(&container));
6762   PetscFunctionReturn(PETSC_SUCCESS);
6763 }
6764 
6765 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6766 {
6767   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6768   Mat                  A = mpiaij->A, B = mpiaij->B;
6769   PetscScalar         *Aa, *Ba;
6770   PetscScalar         *sendbuf, *recvbuf;
6771   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6772   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6773   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6774   const PetscCount    *Cperm1;
6775   PetscContainer       container;
6776   MatCOOStruct_MPIAIJ *coo;
6777 
6778   PetscFunctionBegin;
6779   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6780   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6781   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6782   sendbuf = coo->sendbuf;
6783   recvbuf = coo->recvbuf;
6784   Ajmap1  = coo->Ajmap1;
6785   Ajmap2  = coo->Ajmap2;
6786   Aimap2  = coo->Aimap2;
6787   Bjmap1  = coo->Bjmap1;
6788   Bjmap2  = coo->Bjmap2;
6789   Bimap2  = coo->Bimap2;
6790   Aperm1  = coo->Aperm1;
6791   Aperm2  = coo->Aperm2;
6792   Bperm1  = coo->Bperm1;
6793   Bperm2  = coo->Bperm2;
6794   Cperm1  = coo->Cperm1;
6795 
6796   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6797   PetscCall(MatSeqAIJGetArray(B, &Ba));
6798 
6799   /* Pack entries to be sent to remote */
6800   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6801 
6802   /* Send remote entries to their owner and overlap the communication with local computation */
6803   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6804   /* Add local entries to A and B */
6805   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6806     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6807     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6808     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6809   }
6810   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6811     PetscScalar sum = 0.0;
6812     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6813     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6814   }
6815   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6816 
6817   /* Add received remote entries to A and B */
6818   for (PetscCount i = 0; i < coo->Annz2; i++) {
6819     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6820   }
6821   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6822     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6823   }
6824   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6825   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6826   PetscFunctionReturn(PETSC_SUCCESS);
6827 }
6828 
6829 /*MC
6830    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6831 
6832    Options Database Keys:
6833 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6834 
6835    Level: beginner
6836 
6837    Notes:
6838    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6839     in this case the values associated with the rows and columns one passes in are set to zero
6840     in the matrix
6841 
6842     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6843     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6844 
6845 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6846 M*/
6847 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6848 {
6849   Mat_MPIAIJ *b;
6850   PetscMPIInt size;
6851 
6852   PetscFunctionBegin;
6853   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6854 
6855   PetscCall(PetscNew(&b));
6856   B->data       = (void *)b;
6857   B->ops[0]     = MatOps_Values;
6858   B->assembled  = PETSC_FALSE;
6859   B->insertmode = NOT_SET_VALUES;
6860   b->size       = size;
6861 
6862   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6863 
6864   /* build cache for off array entries formed */
6865   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6866 
6867   b->donotstash  = PETSC_FALSE;
6868   b->colmap      = NULL;
6869   b->garray      = NULL;
6870   b->roworiented = PETSC_TRUE;
6871 
6872   /* stuff used for matrix vector multiply */
6873   b->lvec  = NULL;
6874   b->Mvctx = NULL;
6875 
6876   /* stuff for MatGetRow() */
6877   b->rowindices   = NULL;
6878   b->rowvalues    = NULL;
6879   b->getrowactive = PETSC_FALSE;
6880 
6881   /* flexible pointer used in CUSPARSE classes */
6882   b->spptr = NULL;
6883 
6884   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6885   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6886   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6887   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6888   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6889   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6890   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6891   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6892   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6893   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6894 #if defined(PETSC_HAVE_CUDA)
6895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6896 #endif
6897 #if defined(PETSC_HAVE_HIP)
6898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6899 #endif
6900 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6902 #endif
6903 #if defined(PETSC_HAVE_MKL_SPARSE)
6904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6905 #endif
6906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6910 #if defined(PETSC_HAVE_ELEMENTAL)
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6912 #endif
6913 #if defined(PETSC_HAVE_SCALAPACK)
6914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6915 #endif
6916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6918 #if defined(PETSC_HAVE_HYPRE)
6919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6921 #endif
6922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6926   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6927   PetscFunctionReturn(PETSC_SUCCESS);
6928 }
6929 
6930 /*@C
6931   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6932   and "off-diagonal" part of the matrix in CSR format.
6933 
6934   Collective
6935 
6936   Input Parameters:
6937 + comm - MPI communicator
6938 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6939 . n    - This value should be the same as the local size used in creating the
6940          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6941          calculated if `N` is given) For square matrices `n` is almost always `m`.
6942 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6943 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6944 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6945 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6946 . a    - matrix values
6947 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6948 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6949 - oa   - matrix values
6950 
6951   Output Parameter:
6952 . mat - the matrix
6953 
6954   Level: advanced
6955 
6956   Notes:
6957   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6958   must free the arrays once the matrix has been destroyed and not before.
6959 
6960   The `i` and `j` indices are 0 based
6961 
6962   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6963 
6964   This sets local rows and cannot be used to set off-processor values.
6965 
6966   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6967   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6968   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6969   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6970   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6971   communication if it is known that only local entries will be set.
6972 
6973 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6974           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6975 @*/
6976 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6977 {
6978   Mat_MPIAIJ *maij;
6979 
6980   PetscFunctionBegin;
6981   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6982   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6983   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6984   PetscCall(MatCreate(comm, mat));
6985   PetscCall(MatSetSizes(*mat, m, n, M, N));
6986   PetscCall(MatSetType(*mat, MATMPIAIJ));
6987   maij = (Mat_MPIAIJ *)(*mat)->data;
6988 
6989   (*mat)->preallocated = PETSC_TRUE;
6990 
6991   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6992   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6993 
6994   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6995   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6996 
6997   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6998   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6999   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7000   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7001   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7002   PetscFunctionReturn(PETSC_SUCCESS);
7003 }
7004 
7005 typedef struct {
7006   Mat       *mp;    /* intermediate products */
7007   PetscBool *mptmp; /* is the intermediate product temporary ? */
7008   PetscInt   cp;    /* number of intermediate products */
7009 
7010   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7011   PetscInt    *startsj_s, *startsj_r;
7012   PetscScalar *bufa;
7013   Mat          P_oth;
7014 
7015   /* may take advantage of merging product->B */
7016   Mat Bloc; /* B-local by merging diag and off-diag */
7017 
7018   /* cusparse does not have support to split between symbolic and numeric phases.
7019      When api_user is true, we don't need to update the numerical values
7020      of the temporary storage */
7021   PetscBool reusesym;
7022 
7023   /* support for COO values insertion */
7024   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7025   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7026   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7027   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7028   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7029   PetscMemType mtype;
7030 
7031   /* customization */
7032   PetscBool abmerge;
7033   PetscBool P_oth_bind;
7034 } MatMatMPIAIJBACKEND;
7035 
7036 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7037 {
7038   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7039   PetscInt             i;
7040 
7041   PetscFunctionBegin;
7042   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7043   PetscCall(PetscFree(mmdata->bufa));
7044   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7045   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7046   PetscCall(MatDestroy(&mmdata->P_oth));
7047   PetscCall(MatDestroy(&mmdata->Bloc));
7048   PetscCall(PetscSFDestroy(&mmdata->sf));
7049   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7050   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7051   PetscCall(PetscFree(mmdata->own[0]));
7052   PetscCall(PetscFree(mmdata->own));
7053   PetscCall(PetscFree(mmdata->off[0]));
7054   PetscCall(PetscFree(mmdata->off));
7055   PetscCall(PetscFree(mmdata));
7056   PetscFunctionReturn(PETSC_SUCCESS);
7057 }
7058 
7059 /* Copy selected n entries with indices in idx[] of A to v[].
7060    If idx is NULL, copy the whole data array of A to v[]
7061  */
7062 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7063 {
7064   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7065 
7066   PetscFunctionBegin;
7067   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7068   if (f) {
7069     PetscCall((*f)(A, n, idx, v));
7070   } else {
7071     const PetscScalar *vv;
7072 
7073     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7074     if (n && idx) {
7075       PetscScalar    *w  = v;
7076       const PetscInt *oi = idx;
7077       PetscInt        j;
7078 
7079       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7080     } else {
7081       PetscCall(PetscArraycpy(v, vv, n));
7082     }
7083     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7084   }
7085   PetscFunctionReturn(PETSC_SUCCESS);
7086 }
7087 
7088 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7089 {
7090   MatMatMPIAIJBACKEND *mmdata;
7091   PetscInt             i, n_d, n_o;
7092 
7093   PetscFunctionBegin;
7094   MatCheckProduct(C, 1);
7095   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7096   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7097   if (!mmdata->reusesym) { /* update temporary matrices */
7098     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7099     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7100   }
7101   mmdata->reusesym = PETSC_FALSE;
7102 
7103   for (i = 0; i < mmdata->cp; i++) {
7104     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7105     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7106   }
7107   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7108     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7109 
7110     if (mmdata->mptmp[i]) continue;
7111     if (noff) {
7112       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7113 
7114       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7115       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7116       n_o += noff;
7117       n_d += nown;
7118     } else {
7119       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7120 
7121       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7122       n_d += mm->nz;
7123     }
7124   }
7125   if (mmdata->hasoffproc) { /* offprocess insertion */
7126     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7127     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7128   }
7129   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7130   PetscFunctionReturn(PETSC_SUCCESS);
7131 }
7132 
7133 /* Support for Pt * A, A * P, or Pt * A * P */
7134 #define MAX_NUMBER_INTERMEDIATE 4
7135 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7136 {
7137   Mat_Product           *product = C->product;
7138   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7139   Mat_MPIAIJ            *a, *p;
7140   MatMatMPIAIJBACKEND   *mmdata;
7141   ISLocalToGlobalMapping P_oth_l2g = NULL;
7142   IS                     glob      = NULL;
7143   const char            *prefix;
7144   char                   pprefix[256];
7145   const PetscInt        *globidx, *P_oth_idx;
7146   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7147   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7148   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7149                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7150                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7151   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7152 
7153   MatProductType ptype;
7154   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7155   PetscMPIInt    size;
7156 
7157   PetscFunctionBegin;
7158   MatCheckProduct(C, 1);
7159   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7160   ptype = product->type;
7161   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7162     ptype                                          = MATPRODUCT_AB;
7163     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7164   }
7165   switch (ptype) {
7166   case MATPRODUCT_AB:
7167     A          = product->A;
7168     P          = product->B;
7169     m          = A->rmap->n;
7170     n          = P->cmap->n;
7171     M          = A->rmap->N;
7172     N          = P->cmap->N;
7173     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7174     break;
7175   case MATPRODUCT_AtB:
7176     P          = product->A;
7177     A          = product->B;
7178     m          = P->cmap->n;
7179     n          = A->cmap->n;
7180     M          = P->cmap->N;
7181     N          = A->cmap->N;
7182     hasoffproc = PETSC_TRUE;
7183     break;
7184   case MATPRODUCT_PtAP:
7185     A          = product->A;
7186     P          = product->B;
7187     m          = P->cmap->n;
7188     n          = P->cmap->n;
7189     M          = P->cmap->N;
7190     N          = P->cmap->N;
7191     hasoffproc = PETSC_TRUE;
7192     break;
7193   default:
7194     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7195   }
7196   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7197   if (size == 1) hasoffproc = PETSC_FALSE;
7198 
7199   /* defaults */
7200   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7201     mp[i]    = NULL;
7202     mptmp[i] = PETSC_FALSE;
7203     rmapt[i] = -1;
7204     cmapt[i] = -1;
7205     rmapa[i] = NULL;
7206     cmapa[i] = NULL;
7207   }
7208 
7209   /* customization */
7210   PetscCall(PetscNew(&mmdata));
7211   mmdata->reusesym = product->api_user;
7212   if (ptype == MATPRODUCT_AB) {
7213     if (product->api_user) {
7214       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7215       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7216       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7217       PetscOptionsEnd();
7218     } else {
7219       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7220       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7221       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7222       PetscOptionsEnd();
7223     }
7224   } else if (ptype == MATPRODUCT_PtAP) {
7225     if (product->api_user) {
7226       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7227       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7228       PetscOptionsEnd();
7229     } else {
7230       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7231       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7232       PetscOptionsEnd();
7233     }
7234   }
7235   a = (Mat_MPIAIJ *)A->data;
7236   p = (Mat_MPIAIJ *)P->data;
7237   PetscCall(MatSetSizes(C, m, n, M, N));
7238   PetscCall(PetscLayoutSetUp(C->rmap));
7239   PetscCall(PetscLayoutSetUp(C->cmap));
7240   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7241   PetscCall(MatGetOptionsPrefix(C, &prefix));
7242 
7243   cp = 0;
7244   switch (ptype) {
7245   case MATPRODUCT_AB: /* A * P */
7246     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7247 
7248     /* A_diag * P_local (merged or not) */
7249     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7250       /* P is product->B */
7251       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7252       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7253       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7254       PetscCall(MatProductSetFill(mp[cp], product->fill));
7255       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7256       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7257       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7258       mp[cp]->product->api_user = product->api_user;
7259       PetscCall(MatProductSetFromOptions(mp[cp]));
7260       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7261       PetscCall(ISGetIndices(glob, &globidx));
7262       rmapt[cp] = 1;
7263       cmapt[cp] = 2;
7264       cmapa[cp] = globidx;
7265       mptmp[cp] = PETSC_FALSE;
7266       cp++;
7267     } else { /* A_diag * P_diag and A_diag * P_off */
7268       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7269       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7270       PetscCall(MatProductSetFill(mp[cp], product->fill));
7271       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7272       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7273       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7274       mp[cp]->product->api_user = product->api_user;
7275       PetscCall(MatProductSetFromOptions(mp[cp]));
7276       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7277       rmapt[cp] = 1;
7278       cmapt[cp] = 1;
7279       mptmp[cp] = PETSC_FALSE;
7280       cp++;
7281       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7282       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7283       PetscCall(MatProductSetFill(mp[cp], product->fill));
7284       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7285       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7286       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7287       mp[cp]->product->api_user = product->api_user;
7288       PetscCall(MatProductSetFromOptions(mp[cp]));
7289       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7290       rmapt[cp] = 1;
7291       cmapt[cp] = 2;
7292       cmapa[cp] = p->garray;
7293       mptmp[cp] = PETSC_FALSE;
7294       cp++;
7295     }
7296 
7297     /* A_off * P_other */
7298     if (mmdata->P_oth) {
7299       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7300       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7301       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7302       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7303       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7304       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7305       PetscCall(MatProductSetFill(mp[cp], product->fill));
7306       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7307       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7308       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7309       mp[cp]->product->api_user = product->api_user;
7310       PetscCall(MatProductSetFromOptions(mp[cp]));
7311       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7312       rmapt[cp] = 1;
7313       cmapt[cp] = 2;
7314       cmapa[cp] = P_oth_idx;
7315       mptmp[cp] = PETSC_FALSE;
7316       cp++;
7317     }
7318     break;
7319 
7320   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7321     /* A is product->B */
7322     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7323     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7324       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7325       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7326       PetscCall(MatProductSetFill(mp[cp], product->fill));
7327       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7328       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7329       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7330       mp[cp]->product->api_user = product->api_user;
7331       PetscCall(MatProductSetFromOptions(mp[cp]));
7332       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7333       PetscCall(ISGetIndices(glob, &globidx));
7334       rmapt[cp] = 2;
7335       rmapa[cp] = globidx;
7336       cmapt[cp] = 2;
7337       cmapa[cp] = globidx;
7338       mptmp[cp] = PETSC_FALSE;
7339       cp++;
7340     } else {
7341       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7342       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7343       PetscCall(MatProductSetFill(mp[cp], product->fill));
7344       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7345       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7346       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7347       mp[cp]->product->api_user = product->api_user;
7348       PetscCall(MatProductSetFromOptions(mp[cp]));
7349       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7350       PetscCall(ISGetIndices(glob, &globidx));
7351       rmapt[cp] = 1;
7352       cmapt[cp] = 2;
7353       cmapa[cp] = globidx;
7354       mptmp[cp] = PETSC_FALSE;
7355       cp++;
7356       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7357       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7358       PetscCall(MatProductSetFill(mp[cp], product->fill));
7359       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7360       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7361       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7362       mp[cp]->product->api_user = product->api_user;
7363       PetscCall(MatProductSetFromOptions(mp[cp]));
7364       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7365       rmapt[cp] = 2;
7366       rmapa[cp] = p->garray;
7367       cmapt[cp] = 2;
7368       cmapa[cp] = globidx;
7369       mptmp[cp] = PETSC_FALSE;
7370       cp++;
7371     }
7372     break;
7373   case MATPRODUCT_PtAP:
7374     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7375     /* P is product->B */
7376     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7377     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7378     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7379     PetscCall(MatProductSetFill(mp[cp], product->fill));
7380     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7381     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7382     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7383     mp[cp]->product->api_user = product->api_user;
7384     PetscCall(MatProductSetFromOptions(mp[cp]));
7385     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7386     PetscCall(ISGetIndices(glob, &globidx));
7387     rmapt[cp] = 2;
7388     rmapa[cp] = globidx;
7389     cmapt[cp] = 2;
7390     cmapa[cp] = globidx;
7391     mptmp[cp] = PETSC_FALSE;
7392     cp++;
7393     if (mmdata->P_oth) {
7394       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7395       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7396       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7397       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7398       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7399       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7400       PetscCall(MatProductSetFill(mp[cp], product->fill));
7401       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7402       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7403       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7404       mp[cp]->product->api_user = product->api_user;
7405       PetscCall(MatProductSetFromOptions(mp[cp]));
7406       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7407       mptmp[cp] = PETSC_TRUE;
7408       cp++;
7409       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7410       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7411       PetscCall(MatProductSetFill(mp[cp], product->fill));
7412       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7413       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7414       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7415       mp[cp]->product->api_user = product->api_user;
7416       PetscCall(MatProductSetFromOptions(mp[cp]));
7417       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7418       rmapt[cp] = 2;
7419       rmapa[cp] = globidx;
7420       cmapt[cp] = 2;
7421       cmapa[cp] = P_oth_idx;
7422       mptmp[cp] = PETSC_FALSE;
7423       cp++;
7424     }
7425     break;
7426   default:
7427     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7428   }
7429   /* sanity check */
7430   if (size > 1)
7431     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7432 
7433   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7434   for (i = 0; i < cp; i++) {
7435     mmdata->mp[i]    = mp[i];
7436     mmdata->mptmp[i] = mptmp[i];
7437   }
7438   mmdata->cp             = cp;
7439   C->product->data       = mmdata;
7440   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7441   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7442 
7443   /* memory type */
7444   mmdata->mtype = PETSC_MEMTYPE_HOST;
7445   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7446   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7447   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7448   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7449   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7450   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7451 
7452   /* prepare coo coordinates for values insertion */
7453 
7454   /* count total nonzeros of those intermediate seqaij Mats
7455     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7456     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7457     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7458   */
7459   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7460     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7461     if (mptmp[cp]) continue;
7462     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7463       const PetscInt *rmap = rmapa[cp];
7464       const PetscInt  mr   = mp[cp]->rmap->n;
7465       const PetscInt  rs   = C->rmap->rstart;
7466       const PetscInt  re   = C->rmap->rend;
7467       const PetscInt *ii   = mm->i;
7468       for (i = 0; i < mr; i++) {
7469         const PetscInt gr = rmap[i];
7470         const PetscInt nz = ii[i + 1] - ii[i];
7471         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7472         else ncoo_oown += nz;                  /* this row is local */
7473       }
7474     } else ncoo_d += mm->nz;
7475   }
7476 
7477   /*
7478     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7479 
7480     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7481 
7482     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7483 
7484     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7485     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7486     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7487 
7488     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7489     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7490   */
7491   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7492   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7493 
7494   /* gather (i,j) of nonzeros inserted by remote procs */
7495   if (hasoffproc) {
7496     PetscSF  msf;
7497     PetscInt ncoo2, *coo_i2, *coo_j2;
7498 
7499     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7500     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7501     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7502 
7503     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7504       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7505       PetscInt   *idxoff = mmdata->off[cp];
7506       PetscInt   *idxown = mmdata->own[cp];
7507       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7508         const PetscInt *rmap = rmapa[cp];
7509         const PetscInt *cmap = cmapa[cp];
7510         const PetscInt *ii   = mm->i;
7511         PetscInt       *coi  = coo_i + ncoo_o;
7512         PetscInt       *coj  = coo_j + ncoo_o;
7513         const PetscInt  mr   = mp[cp]->rmap->n;
7514         const PetscInt  rs   = C->rmap->rstart;
7515         const PetscInt  re   = C->rmap->rend;
7516         const PetscInt  cs   = C->cmap->rstart;
7517         for (i = 0; i < mr; i++) {
7518           const PetscInt *jj = mm->j + ii[i];
7519           const PetscInt  gr = rmap[i];
7520           const PetscInt  nz = ii[i + 1] - ii[i];
7521           if (gr < rs || gr >= re) { /* this is an offproc row */
7522             for (j = ii[i]; j < ii[i + 1]; j++) {
7523               *coi++    = gr;
7524               *idxoff++ = j;
7525             }
7526             if (!cmapt[cp]) { /* already global */
7527               for (j = 0; j < nz; j++) *coj++ = jj[j];
7528             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7529               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7530             } else { /* offdiag */
7531               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7532             }
7533             ncoo_o += nz;
7534           } else { /* this is a local row */
7535             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7536           }
7537         }
7538       }
7539       mmdata->off[cp + 1] = idxoff;
7540       mmdata->own[cp + 1] = idxown;
7541     }
7542 
7543     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7544     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7545     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7546     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7547     ncoo = ncoo_d + ncoo_oown + ncoo2;
7548     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7549     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7550     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7551     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7552     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7553     PetscCall(PetscFree2(coo_i, coo_j));
7554     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7555     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7556     coo_i = coo_i2;
7557     coo_j = coo_j2;
7558   } else { /* no offproc values insertion */
7559     ncoo = ncoo_d;
7560     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7561 
7562     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7563     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7564     PetscCall(PetscSFSetUp(mmdata->sf));
7565   }
7566   mmdata->hasoffproc = hasoffproc;
7567 
7568   /* gather (i,j) of nonzeros inserted locally */
7569   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7570     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7571     PetscInt       *coi  = coo_i + ncoo_d;
7572     PetscInt       *coj  = coo_j + ncoo_d;
7573     const PetscInt *jj   = mm->j;
7574     const PetscInt *ii   = mm->i;
7575     const PetscInt *cmap = cmapa[cp];
7576     const PetscInt *rmap = rmapa[cp];
7577     const PetscInt  mr   = mp[cp]->rmap->n;
7578     const PetscInt  rs   = C->rmap->rstart;
7579     const PetscInt  re   = C->rmap->rend;
7580     const PetscInt  cs   = C->cmap->rstart;
7581 
7582     if (mptmp[cp]) continue;
7583     if (rmapt[cp] == 1) { /* consecutive rows */
7584       /* fill coo_i */
7585       for (i = 0; i < mr; i++) {
7586         const PetscInt gr = i + rs;
7587         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7588       }
7589       /* fill coo_j */
7590       if (!cmapt[cp]) { /* type-0, already global */
7591         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7592       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7593         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7594       } else {                                            /* type-2, local to global for sparse columns */
7595         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7596       }
7597       ncoo_d += mm->nz;
7598     } else if (rmapt[cp] == 2) { /* sparse rows */
7599       for (i = 0; i < mr; i++) {
7600         const PetscInt *jj = mm->j + ii[i];
7601         const PetscInt  gr = rmap[i];
7602         const PetscInt  nz = ii[i + 1] - ii[i];
7603         if (gr >= rs && gr < re) { /* local rows */
7604           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7605           if (!cmapt[cp]) { /* type-0, already global */
7606             for (j = 0; j < nz; j++) *coj++ = jj[j];
7607           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7608             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7609           } else { /* type-2, local to global for sparse columns */
7610             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7611           }
7612           ncoo_d += nz;
7613         }
7614       }
7615     }
7616   }
7617   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7618   PetscCall(ISDestroy(&glob));
7619   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7620   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7621   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7622   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7623 
7624   /* preallocate with COO data */
7625   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7626   PetscCall(PetscFree2(coo_i, coo_j));
7627   PetscFunctionReturn(PETSC_SUCCESS);
7628 }
7629 
7630 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7631 {
7632   Mat_Product *product = mat->product;
7633 #if defined(PETSC_HAVE_DEVICE)
7634   PetscBool match  = PETSC_FALSE;
7635   PetscBool usecpu = PETSC_FALSE;
7636 #else
7637   PetscBool match = PETSC_TRUE;
7638 #endif
7639 
7640   PetscFunctionBegin;
7641   MatCheckProduct(mat, 1);
7642 #if defined(PETSC_HAVE_DEVICE)
7643   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7644   if (match) { /* we can always fallback to the CPU if requested */
7645     switch (product->type) {
7646     case MATPRODUCT_AB:
7647       if (product->api_user) {
7648         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7649         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7650         PetscOptionsEnd();
7651       } else {
7652         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7653         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7654         PetscOptionsEnd();
7655       }
7656       break;
7657     case MATPRODUCT_AtB:
7658       if (product->api_user) {
7659         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7660         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7661         PetscOptionsEnd();
7662       } else {
7663         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7664         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7665         PetscOptionsEnd();
7666       }
7667       break;
7668     case MATPRODUCT_PtAP:
7669       if (product->api_user) {
7670         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7671         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7672         PetscOptionsEnd();
7673       } else {
7674         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7675         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7676         PetscOptionsEnd();
7677       }
7678       break;
7679     default:
7680       break;
7681     }
7682     match = (PetscBool)!usecpu;
7683   }
7684 #endif
7685   if (match) {
7686     switch (product->type) {
7687     case MATPRODUCT_AB:
7688     case MATPRODUCT_AtB:
7689     case MATPRODUCT_PtAP:
7690       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7691       break;
7692     default:
7693       break;
7694     }
7695   }
7696   /* fallback to MPIAIJ ops */
7697   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7698   PetscFunctionReturn(PETSC_SUCCESS);
7699 }
7700 
7701 /*
7702    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7703 
7704    n - the number of block indices in cc[]
7705    cc - the block indices (must be large enough to contain the indices)
7706 */
7707 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7708 {
7709   PetscInt        cnt = -1, nidx, j;
7710   const PetscInt *idx;
7711 
7712   PetscFunctionBegin;
7713   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7714   if (nidx) {
7715     cnt     = 0;
7716     cc[cnt] = idx[0] / bs;
7717     for (j = 1; j < nidx; j++) {
7718       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7719     }
7720   }
7721   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7722   *n = cnt + 1;
7723   PetscFunctionReturn(PETSC_SUCCESS);
7724 }
7725 
7726 /*
7727     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7728 
7729     ncollapsed - the number of block indices
7730     collapsed - the block indices (must be large enough to contain the indices)
7731 */
7732 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7733 {
7734   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7735 
7736   PetscFunctionBegin;
7737   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7738   for (i = start + 1; i < start + bs; i++) {
7739     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7740     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7741     cprevtmp = cprev;
7742     cprev    = merged;
7743     merged   = cprevtmp;
7744   }
7745   *ncollapsed = nprev;
7746   if (collapsed) *collapsed = cprev;
7747   PetscFunctionReturn(PETSC_SUCCESS);
7748 }
7749 
7750 /*
7751  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7752 
7753  Input Parameter:
7754  . Amat - matrix
7755  - symmetrize - make the result symmetric
7756  + scale - scale with diagonal
7757 
7758  Output Parameter:
7759  . a_Gmat - output scalar graph >= 0
7760 
7761 */
7762 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7763 {
7764   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7765   MPI_Comm  comm;
7766   Mat       Gmat;
7767   PetscBool ismpiaij, isseqaij;
7768   Mat       a, b, c;
7769   MatType   jtype;
7770 
7771   PetscFunctionBegin;
7772   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7773   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7774   PetscCall(MatGetSize(Amat, &MM, &NN));
7775   PetscCall(MatGetBlockSize(Amat, &bs));
7776   nloc = (Iend - Istart) / bs;
7777 
7778   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7779   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7780   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7781 
7782   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7783   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7784      implementation */
7785   if (bs > 1) {
7786     PetscCall(MatGetType(Amat, &jtype));
7787     PetscCall(MatCreate(comm, &Gmat));
7788     PetscCall(MatSetType(Gmat, jtype));
7789     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7790     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7791     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7792       PetscInt  *d_nnz, *o_nnz;
7793       MatScalar *aa, val, *AA;
7794       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7795       if (isseqaij) {
7796         a = Amat;
7797         b = NULL;
7798       } else {
7799         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7800         a             = d->A;
7801         b             = d->B;
7802       }
7803       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7804       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7805       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7806         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7807         const PetscInt *cols1, *cols2;
7808         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7809           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7810           nnz[brow / bs] = nc2 / bs;
7811           if (nc2 % bs) ok = 0;
7812           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7813           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7814             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7815             if (nc1 != nc2) ok = 0;
7816             else {
7817               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7818                 if (cols1[jj] != cols2[jj]) ok = 0;
7819                 if (cols1[jj] % bs != jj % bs) ok = 0;
7820               }
7821             }
7822             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7823           }
7824           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7825           if (!ok) {
7826             PetscCall(PetscFree2(d_nnz, o_nnz));
7827             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7828             goto old_bs;
7829           }
7830         }
7831       }
7832       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7833       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7834       PetscCall(PetscFree2(d_nnz, o_nnz));
7835       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7836       // diag
7837       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7838         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7839         ai               = aseq->i;
7840         n                = ai[brow + 1] - ai[brow];
7841         aj               = aseq->j + ai[brow];
7842         for (int k = 0; k < n; k += bs) {        // block columns
7843           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7844           val        = 0;
7845           if (index_size == 0) {
7846             for (int ii = 0; ii < bs; ii++) { // rows in block
7847               aa = aseq->a + ai[brow + ii] + k;
7848               for (int jj = 0; jj < bs; jj++) {         // columns in block
7849                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7850               }
7851             }
7852           } else {                                       // use (index,index) value if provided
7853             for (int iii = 0; iii < index_size; iii++) { // rows in block
7854               int ii = index[iii];
7855               aa     = aseq->a + ai[brow + ii] + k;
7856               for (int jjj = 0; jjj < index_size; jjj++) { // columns in block
7857                 int jj = index[jjj];
7858                 val += PetscAbs(PetscRealPart(aa[jj]));
7859               }
7860             }
7861           }
7862           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7863           AA[k / bs] = val;
7864         }
7865         grow = Istart / bs + brow / bs;
7866         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7867       }
7868       // off-diag
7869       if (ismpiaij) {
7870         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7871         const PetscScalar *vals;
7872         const PetscInt    *cols, *garray = aij->garray;
7873         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7874         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7875           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7876           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7877             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7878             AA[k / bs] = 0;
7879             AJ[cidx]   = garray[cols[k]] / bs;
7880           }
7881           nc = ncols / bs;
7882           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7883           if (index_size == 0) {
7884             for (int ii = 0; ii < bs; ii++) { // rows in block
7885               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7886               for (int k = 0; k < ncols; k += bs) {
7887                 for (int jj = 0; jj < bs; jj++) { // cols in block
7888                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7889                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7890                 }
7891               }
7892               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7893             }
7894           } else {                                       // use (index,index) value if provided
7895             for (int iii = 0; iii < index_size; iii++) { // rows in block
7896               int ii = index[iii];
7897               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7898               for (int k = 0; k < ncols; k += bs) {
7899                 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block
7900                   int jj = index[jjj];
7901                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7902                 }
7903               }
7904               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7905             }
7906           }
7907           grow = Istart / bs + brow / bs;
7908           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7909         }
7910       }
7911       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7912       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7913       PetscCall(PetscFree2(AA, AJ));
7914     } else {
7915       const PetscScalar *vals;
7916       const PetscInt    *idx;
7917       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7918     old_bs:
7919       /*
7920        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7921        */
7922       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7923       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7924       if (isseqaij) {
7925         PetscInt max_d_nnz;
7926         /*
7927          Determine exact preallocation count for (sequential) scalar matrix
7928          */
7929         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7930         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7931         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7932         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7933         PetscCall(PetscFree3(w0, w1, w2));
7934       } else if (ismpiaij) {
7935         Mat             Daij, Oaij;
7936         const PetscInt *garray;
7937         PetscInt        max_d_nnz;
7938         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7939         /*
7940          Determine exact preallocation count for diagonal block portion of scalar matrix
7941          */
7942         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7943         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7944         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7945         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7946         PetscCall(PetscFree3(w0, w1, w2));
7947         /*
7948          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7949          */
7950         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7951           o_nnz[jj] = 0;
7952           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7953             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7954             o_nnz[jj] += ncols;
7955             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7956           }
7957           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7958         }
7959       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7960       /* get scalar copy (norms) of matrix */
7961       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7962       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7963       PetscCall(PetscFree2(d_nnz, o_nnz));
7964       for (Ii = Istart; Ii < Iend; Ii++) {
7965         PetscInt dest_row = Ii / bs;
7966         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7967         for (jj = 0; jj < ncols; jj++) {
7968           PetscInt    dest_col = idx[jj] / bs;
7969           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7970           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7971         }
7972         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7973       }
7974       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7975       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7976     }
7977   } else {
7978     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7979     else {
7980       Gmat = Amat;
7981       PetscCall(PetscObjectReference((PetscObject)Gmat));
7982     }
7983     if (isseqaij) {
7984       a = Gmat;
7985       b = NULL;
7986     } else {
7987       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7988       a             = d->A;
7989       b             = d->B;
7990     }
7991     if (filter >= 0 || scale) {
7992       /* take absolute value of each entry */
7993       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7994         MatInfo      info;
7995         PetscScalar *avals;
7996         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7997         PetscCall(MatSeqAIJGetArray(c, &avals));
7998         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7999         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8000       }
8001     }
8002   }
8003   if (symmetrize) {
8004     PetscBool isset, issym;
8005     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8006     if (!isset || !issym) {
8007       Mat matTrans;
8008       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8009       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8010       PetscCall(MatDestroy(&matTrans));
8011     }
8012     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8013   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8014   if (scale) {
8015     /* scale c for all diagonal values = 1 or -1 */
8016     Vec diag;
8017     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8018     PetscCall(MatGetDiagonal(Gmat, diag));
8019     PetscCall(VecReciprocal(diag));
8020     PetscCall(VecSqrtAbs(diag));
8021     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8022     PetscCall(VecDestroy(&diag));
8023   }
8024   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8025 
8026   if (filter >= 0) {
8027     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8028     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8029   }
8030   *a_Gmat = Gmat;
8031   PetscFunctionReturn(PETSC_SUCCESS);
8032 }
8033 
8034 /*
8035     Special version for direct calls from Fortran
8036 */
8037 #include <petsc/private/fortranimpl.h>
8038 
8039 /* Change these macros so can be used in void function */
8040 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8041 #undef PetscCall
8042 #define PetscCall(...) \
8043   do { \
8044     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8045     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8046       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8047       return; \
8048     } \
8049   } while (0)
8050 
8051 #undef SETERRQ
8052 #define SETERRQ(comm, ierr, ...) \
8053   do { \
8054     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8055     return; \
8056   } while (0)
8057 
8058 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8059   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8060 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8061   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8062 #else
8063 #endif
8064 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8065 {
8066   Mat         mat = *mmat;
8067   PetscInt    m = *mm, n = *mn;
8068   InsertMode  addv = *maddv;
8069   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8070   PetscScalar value;
8071 
8072   MatCheckPreallocated(mat, 1);
8073   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8074   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8075   {
8076     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8077     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8078     PetscBool roworiented = aij->roworiented;
8079 
8080     /* Some Variables required in the macro */
8081     Mat         A     = aij->A;
8082     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8083     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8084     MatScalar  *aa;
8085     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8086     Mat         B                 = aij->B;
8087     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8088     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8089     MatScalar  *ba;
8090     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8091      * cannot use "#if defined" inside a macro. */
8092     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8093 
8094     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8095     PetscInt   nonew = a->nonew;
8096     MatScalar *ap1, *ap2;
8097 
8098     PetscFunctionBegin;
8099     PetscCall(MatSeqAIJGetArray(A, &aa));
8100     PetscCall(MatSeqAIJGetArray(B, &ba));
8101     for (i = 0; i < m; i++) {
8102       if (im[i] < 0) continue;
8103       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8104       if (im[i] >= rstart && im[i] < rend) {
8105         row      = im[i] - rstart;
8106         lastcol1 = -1;
8107         rp1      = aj + ai[row];
8108         ap1      = aa + ai[row];
8109         rmax1    = aimax[row];
8110         nrow1    = ailen[row];
8111         low1     = 0;
8112         high1    = nrow1;
8113         lastcol2 = -1;
8114         rp2      = bj + bi[row];
8115         ap2      = ba + bi[row];
8116         rmax2    = bimax[row];
8117         nrow2    = bilen[row];
8118         low2     = 0;
8119         high2    = nrow2;
8120 
8121         for (j = 0; j < n; j++) {
8122           if (roworiented) value = v[i * n + j];
8123           else value = v[i + j * m];
8124           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8125           if (in[j] >= cstart && in[j] < cend) {
8126             col = in[j] - cstart;
8127             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8128           } else if (in[j] < 0) continue;
8129           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8130             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8131           } else {
8132             if (mat->was_assembled) {
8133               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8134 #if defined(PETSC_USE_CTABLE)
8135               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8136               col--;
8137 #else
8138               col = aij->colmap[in[j]] - 1;
8139 #endif
8140               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8141                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8142                 col = in[j];
8143                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8144                 B        = aij->B;
8145                 b        = (Mat_SeqAIJ *)B->data;
8146                 bimax    = b->imax;
8147                 bi       = b->i;
8148                 bilen    = b->ilen;
8149                 bj       = b->j;
8150                 rp2      = bj + bi[row];
8151                 ap2      = ba + bi[row];
8152                 rmax2    = bimax[row];
8153                 nrow2    = bilen[row];
8154                 low2     = 0;
8155                 high2    = nrow2;
8156                 bm       = aij->B->rmap->n;
8157                 ba       = b->a;
8158                 inserted = PETSC_FALSE;
8159               }
8160             } else col = in[j];
8161             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8162           }
8163         }
8164       } else if (!aij->donotstash) {
8165         if (roworiented) {
8166           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8167         } else {
8168           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8169         }
8170       }
8171     }
8172     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8173     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8174   }
8175   PetscFunctionReturnVoid();
8176 }
8177 
8178 /* Undefining these here since they were redefined from their original definition above! No
8179  * other PETSc functions should be defined past this point, as it is impossible to recover the
8180  * original definitions */
8181 #undef PetscCall
8182 #undef SETERRQ
8183