xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d9acb416d05abeed0a33bde3a81aeb2ea0364f6a)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166 
167   PetscFunctionReturn(PETSC_SUCCESS);
168 }
169 
170 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
171 {
172   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
173 
174   PetscFunctionBegin;
175   if (mat->A) {
176     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
177     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
178   }
179   PetscFunctionReturn(PETSC_SUCCESS);
180 }
181 
182 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
183 {
184   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
185   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
186   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
187   const PetscInt  *ia, *ib;
188   const MatScalar *aa, *bb, *aav, *bav;
189   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
190   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
191 
192   PetscFunctionBegin;
193   *keptrows = NULL;
194 
195   ia = a->i;
196   ib = b->i;
197   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
198   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
199   for (i = 0; i < m; i++) {
200     na = ia[i + 1] - ia[i];
201     nb = ib[i + 1] - ib[i];
202     if (!na && !nb) {
203       cnt++;
204       goto ok1;
205     }
206     aa = aav + ia[i];
207     for (j = 0; j < na; j++) {
208       if (aa[j] != 0.0) goto ok1;
209     }
210     bb = PetscSafePointerPlusOffset(bav, ib[i]);
211     for (j = 0; j < nb; j++) {
212       if (bb[j] != 0.0) goto ok1;
213     }
214     cnt++;
215   ok1:;
216   }
217   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
218   if (!n0rows) {
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
220     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
221     PetscFunctionReturn(PETSC_SUCCESS);
222   }
223   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
224   cnt = 0;
225   for (i = 0; i < m; i++) {
226     na = ia[i + 1] - ia[i];
227     nb = ib[i + 1] - ib[i];
228     if (!na && !nb) continue;
229     aa = aav + ia[i];
230     for (j = 0; j < na; j++) {
231       if (aa[j] != 0.0) {
232         rows[cnt++] = rstart + i;
233         goto ok2;
234       }
235     }
236     bb = PetscSafePointerPlusOffset(bav, ib[i]);
237     for (j = 0; j < nb; j++) {
238       if (bb[j] != 0.0) {
239         rows[cnt++] = rstart + i;
240         goto ok2;
241       }
242     }
243   ok2:;
244   }
245   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
247   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
252 {
253   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
254   PetscBool   cong;
255 
256   PetscFunctionBegin;
257   PetscCall(MatHasCongruentLayouts(Y, &cong));
258   if (Y->assembled && cong) {
259     PetscCall(MatDiagonalSet(aij->A, D, is));
260   } else {
261     PetscCall(MatDiagonalSet_Default(Y, D, is));
262   }
263   PetscFunctionReturn(PETSC_SUCCESS);
264 }
265 
266 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
267 {
268   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
269   PetscInt    i, rstart, nrows, *rows;
270 
271   PetscFunctionBegin;
272   *zrows = NULL;
273   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
274   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
275   for (i = 0; i < nrows; i++) rows[i] += rstart;
276   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
280 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
281 {
282   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
283   PetscInt           i, m, n, *garray = aij->garray;
284   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
285   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
286   PetscReal         *work;
287   const PetscScalar *dummy;
288 
289   PetscFunctionBegin;
290   PetscCall(MatGetSize(A, &m, &n));
291   PetscCall(PetscCalloc1(n, &work));
292   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
294   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
296   if (type == NORM_2) {
297     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
298     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
299   } else if (type == NORM_1) {
300     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
301     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
302   } else if (type == NORM_INFINITY) {
303     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
304     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
306     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
307     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
308   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
309     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
310     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
311   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
312   if (type == NORM_INFINITY) {
313     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
314   } else {
315     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
316   }
317   PetscCall(PetscFree(work));
318   if (type == NORM_2) {
319     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
320   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
321     for (i = 0; i < n; i++) reductions[i] /= m;
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
327 {
328   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
329   IS              sis, gis;
330   const PetscInt *isis, *igis;
331   PetscInt        n, *iis, nsis, ngis, rstart, i;
332 
333   PetscFunctionBegin;
334   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
335   PetscCall(MatFindNonzeroRows(a->B, &gis));
336   PetscCall(ISGetSize(gis, &ngis));
337   PetscCall(ISGetSize(sis, &nsis));
338   PetscCall(ISGetIndices(sis, &isis));
339   PetscCall(ISGetIndices(gis, &igis));
340 
341   PetscCall(PetscMalloc1(ngis + nsis, &iis));
342   PetscCall(PetscArraycpy(iis, igis, ngis));
343   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
344   n = ngis + nsis;
345   PetscCall(PetscSortRemoveDupsInt(&n, iis));
346   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
347   for (i = 0; i < n; i++) iis[i] += rstart;
348   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
349 
350   PetscCall(ISRestoreIndices(sis, &isis));
351   PetscCall(ISRestoreIndices(gis, &igis));
352   PetscCall(ISDestroy(&sis));
353   PetscCall(ISDestroy(&gis));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 /*
358   Local utility routine that creates a mapping from the global column
359 number to the local number in the off-diagonal part of the local
360 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
361 a slightly higher hash table cost; without it it is not scalable (each processor
362 has an order N integer array but is fast to access.
363 */
364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
365 {
366   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
367   PetscInt    n   = aij->B->cmap->n, i;
368 
369   PetscFunctionBegin;
370   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
371 #if defined(PETSC_USE_CTABLE)
372   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
373   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
374 #else
375   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
376   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
377 #endif
378   PetscFunctionReturn(PETSC_SUCCESS);
379 }
380 
381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
382   do { \
383     if (col <= lastcol1) low1 = 0; \
384     else high1 = nrow1; \
385     lastcol1 = col; \
386     while (high1 - low1 > 5) { \
387       t = (low1 + high1) / 2; \
388       if (rp1[t] > col) high1 = t; \
389       else low1 = t; \
390     } \
391     for (_i = low1; _i < high1; _i++) { \
392       if (rp1[_i] > col) break; \
393       if (rp1[_i] == col) { \
394         if (addv == ADD_VALUES) { \
395           ap1[_i] += value; \
396           /* Not sure LogFlops will slow dow the code or not */ \
397           (void)PetscLogFlops(1.0); \
398         } else ap1[_i] = value; \
399         goto a_noinsert; \
400       } \
401     } \
402     if (value == 0.0 && ignorezeroentries && row != col) { \
403       low1  = 0; \
404       high1 = nrow1; \
405       goto a_noinsert; \
406     } \
407     if (nonew == 1) { \
408       low1  = 0; \
409       high1 = nrow1; \
410       goto a_noinsert; \
411     } \
412     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
413     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
414     N = nrow1++ - 1; \
415     a->nz++; \
416     high1++; \
417     /* shift up all the later entries in this row */ \
418     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
419     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
420     rp1[_i] = col; \
421     ap1[_i] = value; \
422     A->nonzerostate++; \
423   a_noinsert:; \
424     ailen[row] = nrow1; \
425   } while (0)
426 
427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
428   do { \
429     if (col <= lastcol2) low2 = 0; \
430     else high2 = nrow2; \
431     lastcol2 = col; \
432     while (high2 - low2 > 5) { \
433       t = (low2 + high2) / 2; \
434       if (rp2[t] > col) high2 = t; \
435       else low2 = t; \
436     } \
437     for (_i = low2; _i < high2; _i++) { \
438       if (rp2[_i] > col) break; \
439       if (rp2[_i] == col) { \
440         if (addv == ADD_VALUES) { \
441           ap2[_i] += value; \
442           (void)PetscLogFlops(1.0); \
443         } else ap2[_i] = value; \
444         goto b_noinsert; \
445       } \
446     } \
447     if (value == 0.0 && ignorezeroentries) { \
448       low2  = 0; \
449       high2 = nrow2; \
450       goto b_noinsert; \
451     } \
452     if (nonew == 1) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
458     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
459     N = nrow2++ - 1; \
460     b->nz++; \
461     high2++; \
462     /* shift up all the later entries in this row */ \
463     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
464     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
465     rp2[_i] = col; \
466     ap2[_i] = value; \
467     B->nonzerostate++; \
468   b_noinsert:; \
469     bilen[row] = nrow2; \
470   } while (0)
471 
472 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
473 {
474   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
475   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
476   PetscInt     l, *garray                         = mat->garray, diag;
477   PetscScalar *aa, *ba;
478 
479   PetscFunctionBegin;
480   /* code only works for square matrices A */
481 
482   /* find size of row to the left of the diagonal part */
483   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
484   row = row - diag;
485   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
486     if (garray[b->j[b->i[row] + l]] > diag) break;
487   }
488   if (l) {
489     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
490     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
491     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
492   }
493 
494   /* diagonal part */
495   if (a->i[row + 1] - a->i[row]) {
496     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
497     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
498     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
499   }
500 
501   /* right of diagonal part */
502   if (b->i[row + 1] - b->i[row] - l) {
503     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
504     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
505     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
506   }
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
511 {
512   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
513   PetscScalar value = 0.0;
514   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
515   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
516   PetscBool   roworiented = aij->roworiented;
517 
518   /* Some Variables required in the macro */
519   Mat         A     = aij->A;
520   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
521   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
522   PetscBool   ignorezeroentries = a->ignorezeroentries;
523   Mat         B                 = aij->B;
524   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
525   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
526   MatScalar  *aa, *ba;
527   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
528   PetscInt    nonew;
529   MatScalar  *ap1, *ap2;
530 
531   PetscFunctionBegin;
532   PetscCall(MatSeqAIJGetArray(A, &aa));
533   PetscCall(MatSeqAIJGetArray(B, &ba));
534   for (i = 0; i < m; i++) {
535     if (im[i] < 0) continue;
536     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
537     if (im[i] >= rstart && im[i] < rend) {
538       row      = im[i] - rstart;
539       lastcol1 = -1;
540       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
541       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
542       rmax1    = aimax[row];
543       nrow1    = ailen[row];
544       low1     = 0;
545       high1    = nrow1;
546       lastcol2 = -1;
547       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
548       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
549       rmax2    = bimax[row];
550       nrow2    = bilen[row];
551       low2     = 0;
552       high2    = nrow2;
553 
554       for (j = 0; j < n; j++) {
555         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
556         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
557         if (in[j] >= cstart && in[j] < cend) {
558           col   = in[j] - cstart;
559           nonew = a->nonew;
560           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
561         } else if (in[j] < 0) {
562           continue;
563         } else {
564           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
565           if (mat->was_assembled) {
566             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
567 #if defined(PETSC_USE_CTABLE)
568             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
569             col--;
570 #else
571             col = aij->colmap[in[j]] - 1;
572 #endif
573             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
574               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
575               col = in[j];
576               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
577               B     = aij->B;
578               b     = (Mat_SeqAIJ *)B->data;
579               bimax = b->imax;
580               bi    = b->i;
581               bilen = b->ilen;
582               bj    = b->j;
583               ba    = b->a;
584               rp2   = bj + bi[row];
585               ap2   = ba + bi[row];
586               rmax2 = bimax[row];
587               nrow2 = bilen[row];
588               low2  = 0;
589               high2 = nrow2;
590               bm    = aij->B->rmap->n;
591               ba    = b->a;
592             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
593               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
594                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
595               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
596             }
597           } else col = in[j];
598           nonew = b->nonew;
599           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
600         }
601       }
602     } else {
603       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
604       if (!aij->donotstash) {
605         mat->assembled = PETSC_FALSE;
606         if (roworiented) {
607           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
608         } else {
609           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
610         }
611       }
612     }
613   }
614   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
615   PetscCall(MatSeqAIJRestoreArray(B, &ba));
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 /*
620     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
621     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
622     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
623 */
624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
625 {
626   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
627   Mat         A      = aij->A; /* diagonal part of the matrix */
628   Mat         B      = aij->B; /* off-diagonal part of the matrix */
629   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
630   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
631   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
632   PetscInt   *ailen = a->ilen, *aj = a->j;
633   PetscInt   *bilen = b->ilen, *bj = b->j;
634   PetscInt    am          = aij->A->rmap->n, j;
635   PetscInt    diag_so_far = 0, dnz;
636   PetscInt    offd_so_far = 0, onz;
637 
638   PetscFunctionBegin;
639   /* Iterate over all rows of the matrix */
640   for (j = 0; j < am; j++) {
641     dnz = onz = 0;
642     /*  Iterate over all non-zero columns of the current row */
643     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
644       /* If column is in the diagonal */
645       if (mat_j[col] >= cstart && mat_j[col] < cend) {
646         aj[diag_so_far++] = mat_j[col] - cstart;
647         dnz++;
648       } else { /* off-diagonal entries */
649         bj[offd_so_far++] = mat_j[col];
650         onz++;
651       }
652     }
653     ailen[j] = dnz;
654     bilen[j] = onz;
655   }
656   PetscFunctionReturn(PETSC_SUCCESS);
657 }
658 
659 /*
660     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
661     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
662     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
663     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
664     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
665 */
666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
667 {
668   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
669   Mat          A    = aij->A; /* diagonal part of the matrix */
670   Mat          B    = aij->B; /* off-diagonal part of the matrix */
671   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
672   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
673   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
674   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
675   PetscInt    *ailen = a->ilen, *aj = a->j;
676   PetscInt    *bilen = b->ilen, *bj = b->j;
677   PetscInt     am          = aij->A->rmap->n, j;
678   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
679   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
680   PetscScalar *aa = a->a, *ba = b->a;
681 
682   PetscFunctionBegin;
683   /* Iterate over all rows of the matrix */
684   for (j = 0; j < am; j++) {
685     dnz_row = onz_row = 0;
686     rowstart_offd     = full_offd_i[j];
687     rowstart_diag     = full_diag_i[j];
688     /*  Iterate over all non-zero columns of the current row */
689     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
690       /* If column is in the diagonal */
691       if (mat_j[col] >= cstart && mat_j[col] < cend) {
692         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
693         aa[rowstart_diag + dnz_row] = mat_a[col];
694         dnz_row++;
695       } else { /* off-diagonal entries */
696         bj[rowstart_offd + onz_row] = mat_j[col];
697         ba[rowstart_offd + onz_row] = mat_a[col];
698         onz_row++;
699       }
700     }
701     ailen[j] = dnz_row;
702     bilen[j] = onz_row;
703   }
704   PetscFunctionReturn(PETSC_SUCCESS);
705 }
706 
707 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
708 {
709   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
710   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
711   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
712 
713   PetscFunctionBegin;
714   for (i = 0; i < m; i++) {
715     if (idxm[i] < 0) continue; /* negative row */
716     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
717     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
718     row = idxm[i] - rstart;
719     for (j = 0; j < n; j++) {
720       if (idxn[j] < 0) continue; /* negative column */
721       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
722       if (idxn[j] >= cstart && idxn[j] < cend) {
723         col = idxn[j] - cstart;
724         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
725       } else {
726         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
727 #if defined(PETSC_USE_CTABLE)
728         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
729         col--;
730 #else
731         col = aij->colmap[idxn[j]] - 1;
732 #endif
733         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
734         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
735       }
736     }
737   }
738   PetscFunctionReturn(PETSC_SUCCESS);
739 }
740 
741 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
742 {
743   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
744   PetscInt    nstash, reallocs;
745 
746   PetscFunctionBegin;
747   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
748 
749   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
750   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
751   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
752   PetscFunctionReturn(PETSC_SUCCESS);
753 }
754 
755 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
756 {
757   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
758   PetscMPIInt  n;
759   PetscInt     i, j, rstart, ncols, flg;
760   PetscInt    *row, *col;
761   PetscBool    other_disassembled;
762   PetscScalar *val;
763 
764   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
765 
766   PetscFunctionBegin;
767   if (!aij->donotstash && !mat->nooffprocentries) {
768     while (1) {
769       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
770       if (!flg) break;
771 
772       for (i = 0; i < n;) {
773         /* Now identify the consecutive vals belonging to the same row */
774         for (j = i, rstart = row[j]; j < n; j++) {
775           if (row[j] != rstart) break;
776         }
777         if (j < n) ncols = j - i;
778         else ncols = n - i;
779         /* Now assemble all these values with a single function call */
780         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
781         i = j;
782       }
783     }
784     PetscCall(MatStashScatterEnd_Private(&mat->stash));
785   }
786 #if defined(PETSC_HAVE_DEVICE)
787   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
788   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
789   if (mat->boundtocpu) {
790     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
791     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
792   }
793 #endif
794   PetscCall(MatAssemblyBegin(aij->A, mode));
795   PetscCall(MatAssemblyEnd(aij->A, mode));
796 
797   /* determine if any processor has disassembled, if so we must
798      also disassemble ourself, in order that we may reassemble. */
799   /*
800      if nonzero structure of submatrix B cannot change then we know that
801      no processor disassembled thus we can skip this stuff
802   */
803   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
804     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
805     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
806       PetscCall(MatDisAssemble_MPIAIJ(mat));
807     }
808   }
809   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
810   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
811 #if defined(PETSC_HAVE_DEVICE)
812   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
813 #endif
814   PetscCall(MatAssemblyBegin(aij->B, mode));
815   PetscCall(MatAssemblyEnd(aij->B, mode));
816 
817   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
818 
819   aij->rowvalues = NULL;
820 
821   PetscCall(VecDestroy(&aij->diag));
822 
823   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
824   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
825     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
826     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
827   }
828 #if defined(PETSC_HAVE_DEVICE)
829   mat->offloadmask = PETSC_OFFLOAD_BOTH;
830 #endif
831   PetscFunctionReturn(PETSC_SUCCESS);
832 }
833 
834 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
835 {
836   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
837 
838   PetscFunctionBegin;
839   PetscCall(MatZeroEntries(l->A));
840   PetscCall(MatZeroEntries(l->B));
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
845 {
846   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
847   PetscInt   *lrows;
848   PetscInt    r, len;
849   PetscBool   cong;
850 
851   PetscFunctionBegin;
852   /* get locally owned rows */
853   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
854   PetscCall(MatHasCongruentLayouts(A, &cong));
855   /* fix right hand side if needed */
856   if (x && b) {
857     const PetscScalar *xx;
858     PetscScalar       *bb;
859 
860     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
861     PetscCall(VecGetArrayRead(x, &xx));
862     PetscCall(VecGetArray(b, &bb));
863     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
864     PetscCall(VecRestoreArrayRead(x, &xx));
865     PetscCall(VecRestoreArray(b, &bb));
866   }
867 
868   if (diag != 0.0 && cong) {
869     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
870     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
871   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
872     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
873     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
874     PetscInt    nnwA, nnwB;
875     PetscBool   nnzA, nnzB;
876 
877     nnwA = aijA->nonew;
878     nnwB = aijB->nonew;
879     nnzA = aijA->keepnonzeropattern;
880     nnzB = aijB->keepnonzeropattern;
881     if (!nnzA) {
882       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
883       aijA->nonew = 0;
884     }
885     if (!nnzB) {
886       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
887       aijB->nonew = 0;
888     }
889     /* Must zero here before the next loop */
890     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
891     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
892     for (r = 0; r < len; ++r) {
893       const PetscInt row = lrows[r] + A->rmap->rstart;
894       if (row >= A->cmap->N) continue;
895       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
896     }
897     aijA->nonew = nnwA;
898     aijB->nonew = nnwB;
899   } else {
900     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
901     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
902   }
903   PetscCall(PetscFree(lrows));
904   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
905   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
906 
907   /* only change matrix nonzero state if pattern was allowed to be changed */
908   if (!((Mat_SeqAIJ *)(mat->A->data))->keepnonzeropattern || !((Mat_SeqAIJ *)(mat->A->data))->nonew) {
909     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
910     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
911   }
912   PetscFunctionReturn(PETSC_SUCCESS);
913 }
914 
915 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
916 {
917   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
918   PetscMPIInt        n = A->rmap->n;
919   PetscInt           i, j, r, m, len = 0;
920   PetscInt          *lrows, *owners = A->rmap->range;
921   PetscMPIInt        p = 0;
922   PetscSFNode       *rrows;
923   PetscSF            sf;
924   const PetscScalar *xx;
925   PetscScalar       *bb, *mask, *aij_a;
926   Vec                xmask, lmask;
927   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
928   const PetscInt    *aj, *ii, *ridx;
929   PetscScalar       *aa;
930 
931   PetscFunctionBegin;
932   /* Create SF where leaves are input rows and roots are owned rows */
933   PetscCall(PetscMalloc1(n, &lrows));
934   for (r = 0; r < n; ++r) lrows[r] = -1;
935   PetscCall(PetscMalloc1(N, &rrows));
936   for (r = 0; r < N; ++r) {
937     const PetscInt idx = rows[r];
938     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
939     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
940       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
941     }
942     rrows[r].rank  = p;
943     rrows[r].index = rows[r] - owners[p];
944   }
945   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
946   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
947   /* Collect flags for rows to be zeroed */
948   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
949   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
950   PetscCall(PetscSFDestroy(&sf));
951   /* Compress and put in row numbers */
952   for (r = 0; r < n; ++r)
953     if (lrows[r] >= 0) lrows[len++] = r;
954   /* zero diagonal part of matrix */
955   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
956   /* handle off-diagonal part of matrix */
957   PetscCall(MatCreateVecs(A, &xmask, NULL));
958   PetscCall(VecDuplicate(l->lvec, &lmask));
959   PetscCall(VecGetArray(xmask, &bb));
960   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
961   PetscCall(VecRestoreArray(xmask, &bb));
962   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
963   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
964   PetscCall(VecDestroy(&xmask));
965   if (x && b) { /* this code is buggy when the row and column layout don't match */
966     PetscBool cong;
967 
968     PetscCall(MatHasCongruentLayouts(A, &cong));
969     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
970     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
971     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
972     PetscCall(VecGetArrayRead(l->lvec, &xx));
973     PetscCall(VecGetArray(b, &bb));
974   }
975   PetscCall(VecGetArray(lmask, &mask));
976   /* remove zeroed rows of off-diagonal matrix */
977   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
978   ii = aij->i;
979   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
980   /* loop over all elements of off process part of matrix zeroing removed columns*/
981   if (aij->compressedrow.use) {
982     m    = aij->compressedrow.nrows;
983     ii   = aij->compressedrow.i;
984     ridx = aij->compressedrow.rindex;
985     for (i = 0; i < m; i++) {
986       n  = ii[i + 1] - ii[i];
987       aj = aij->j + ii[i];
988       aa = aij_a + ii[i];
989 
990       for (j = 0; j < n; j++) {
991         if (PetscAbsScalar(mask[*aj])) {
992           if (b) bb[*ridx] -= *aa * xx[*aj];
993           *aa = 0.0;
994         }
995         aa++;
996         aj++;
997       }
998       ridx++;
999     }
1000   } else { /* do not use compressed row format */
1001     m = l->B->rmap->n;
1002     for (i = 0; i < m; i++) {
1003       n  = ii[i + 1] - ii[i];
1004       aj = aij->j + ii[i];
1005       aa = aij_a + ii[i];
1006       for (j = 0; j < n; j++) {
1007         if (PetscAbsScalar(mask[*aj])) {
1008           if (b) bb[i] -= *aa * xx[*aj];
1009           *aa = 0.0;
1010         }
1011         aa++;
1012         aj++;
1013       }
1014     }
1015   }
1016   if (x && b) {
1017     PetscCall(VecRestoreArray(b, &bb));
1018     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1019   }
1020   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1021   PetscCall(VecRestoreArray(lmask, &mask));
1022   PetscCall(VecDestroy(&lmask));
1023   PetscCall(PetscFree(lrows));
1024 
1025   /* only change matrix nonzero state if pattern was allowed to be changed */
1026   if (!((Mat_SeqAIJ *)(l->A->data))->nonew) {
1027     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1028     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1029   }
1030   PetscFunctionReturn(PETSC_SUCCESS);
1031 }
1032 
1033 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1034 {
1035   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1036   PetscInt    nt;
1037   VecScatter  Mvctx = a->Mvctx;
1038 
1039   PetscFunctionBegin;
1040   PetscCall(VecGetLocalSize(xx, &nt));
1041   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1042   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1043   PetscUseTypeMethod(a->A, mult, xx, yy);
1044   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1045   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1046   PetscFunctionReturn(PETSC_SUCCESS);
1047 }
1048 
1049 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1050 {
1051   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1052 
1053   PetscFunctionBegin;
1054   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1055   PetscFunctionReturn(PETSC_SUCCESS);
1056 }
1057 
1058 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1059 {
1060   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1061   VecScatter  Mvctx = a->Mvctx;
1062 
1063   PetscFunctionBegin;
1064   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1065   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1066   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1067   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1068   PetscFunctionReturn(PETSC_SUCCESS);
1069 }
1070 
1071 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1072 {
1073   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1074 
1075   PetscFunctionBegin;
1076   /* do nondiagonal part */
1077   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1078   /* do local part */
1079   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1080   /* add partial results together */
1081   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1082   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1083   PetscFunctionReturn(PETSC_SUCCESS);
1084 }
1085 
1086 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1087 {
1088   MPI_Comm    comm;
1089   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1090   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1091   IS          Me, Notme;
1092   PetscInt    M, N, first, last, *notme, i;
1093   PetscBool   lf;
1094   PetscMPIInt size;
1095 
1096   PetscFunctionBegin;
1097   /* Easy test: symmetric diagonal block */
1098   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1099   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1100   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1101   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1102   PetscCallMPI(MPI_Comm_size(comm, &size));
1103   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1104 
1105   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1106   PetscCall(MatGetSize(Amat, &M, &N));
1107   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1108   PetscCall(PetscMalloc1(N - last + first, &notme));
1109   for (i = 0; i < first; i++) notme[i] = i;
1110   for (i = last; i < M; i++) notme[i - last + first] = i;
1111   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1112   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1113   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1114   Aoff = Aoffs[0];
1115   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1116   Boff = Boffs[0];
1117   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1118   PetscCall(MatDestroyMatrices(1, &Aoffs));
1119   PetscCall(MatDestroyMatrices(1, &Boffs));
1120   PetscCall(ISDestroy(&Me));
1121   PetscCall(ISDestroy(&Notme));
1122   PetscCall(PetscFree(notme));
1123   PetscFunctionReturn(PETSC_SUCCESS);
1124 }
1125 
1126 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1127 {
1128   PetscFunctionBegin;
1129   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1130   PetscFunctionReturn(PETSC_SUCCESS);
1131 }
1132 
1133 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1134 {
1135   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1136 
1137   PetscFunctionBegin;
1138   /* do nondiagonal part */
1139   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1140   /* do local part */
1141   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1142   /* add partial results together */
1143   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1144   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1145   PetscFunctionReturn(PETSC_SUCCESS);
1146 }
1147 
1148 /*
1149   This only works correctly for square matrices where the subblock A->A is the
1150    diagonal block
1151 */
1152 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1153 {
1154   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1155 
1156   PetscFunctionBegin;
1157   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1158   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1159   PetscCall(MatGetDiagonal(a->A, v));
1160   PetscFunctionReturn(PETSC_SUCCESS);
1161 }
1162 
1163 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1164 {
1165   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1166 
1167   PetscFunctionBegin;
1168   PetscCall(MatScale(a->A, aa));
1169   PetscCall(MatScale(a->B, aa));
1170   PetscFunctionReturn(PETSC_SUCCESS);
1171 }
1172 
1173 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1174 {
1175   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1176   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1177   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1178   const PetscInt    *garray = aij->garray;
1179   const PetscScalar *aa, *ba;
1180   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1181   PetscInt64         nz, hnz;
1182   PetscInt          *rowlens;
1183   PetscInt          *colidxs;
1184   PetscScalar       *matvals;
1185   PetscMPIInt        rank;
1186 
1187   PetscFunctionBegin;
1188   PetscCall(PetscViewerSetUp(viewer));
1189 
1190   M  = mat->rmap->N;
1191   N  = mat->cmap->N;
1192   m  = mat->rmap->n;
1193   rs = mat->rmap->rstart;
1194   cs = mat->cmap->rstart;
1195   nz = A->nz + B->nz;
1196 
1197   /* write matrix header */
1198   header[0] = MAT_FILE_CLASSID;
1199   header[1] = M;
1200   header[2] = N;
1201   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1202   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1203   if (rank == 0) {
1204     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1205     else header[3] = (PetscInt)hnz;
1206   }
1207   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1208 
1209   /* fill in and store row lengths  */
1210   PetscCall(PetscMalloc1(m, &rowlens));
1211   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1212   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1213   PetscCall(PetscFree(rowlens));
1214 
1215   /* fill in and store column indices */
1216   PetscCall(PetscMalloc1(nz, &colidxs));
1217   for (cnt = 0, i = 0; i < m; i++) {
1218     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1219       if (garray[B->j[jb]] > cs) break;
1220       colidxs[cnt++] = garray[B->j[jb]];
1221     }
1222     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1223     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1224   }
1225   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1226   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1227   PetscCall(PetscFree(colidxs));
1228 
1229   /* fill in and store nonzero values */
1230   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1231   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1232   PetscCall(PetscMalloc1(nz, &matvals));
1233   for (cnt = 0, i = 0; i < m; i++) {
1234     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1235       if (garray[B->j[jb]] > cs) break;
1236       matvals[cnt++] = ba[jb];
1237     }
1238     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1239     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1240   }
1241   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1242   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1243   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1244   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1245   PetscCall(PetscFree(matvals));
1246 
1247   /* write block size option to the viewer's .info file */
1248   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1249   PetscFunctionReturn(PETSC_SUCCESS);
1250 }
1251 
1252 #include <petscdraw.h>
1253 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1254 {
1255   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1256   PetscMPIInt       rank = aij->rank, size = aij->size;
1257   PetscBool         isdraw, iascii, isbinary;
1258   PetscViewer       sviewer;
1259   PetscViewerFormat format;
1260 
1261   PetscFunctionBegin;
1262   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1263   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1264   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1265   if (iascii) {
1266     PetscCall(PetscViewerGetFormat(viewer, &format));
1267     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1268       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1269       PetscCall(PetscMalloc1(size, &nz));
1270       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1271       for (i = 0; i < (PetscInt)size; i++) {
1272         nmax = PetscMax(nmax, nz[i]);
1273         nmin = PetscMin(nmin, nz[i]);
1274         navg += nz[i];
1275       }
1276       PetscCall(PetscFree(nz));
1277       navg = navg / size;
1278       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1279       PetscFunctionReturn(PETSC_SUCCESS);
1280     }
1281     PetscCall(PetscViewerGetFormat(viewer, &format));
1282     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1283       MatInfo   info;
1284       PetscInt *inodes = NULL;
1285 
1286       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1287       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1288       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1289       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1290       if (!inodes) {
1291         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1292                                                      (double)info.memory));
1293       } else {
1294         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1295                                                      (double)info.memory));
1296       }
1297       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1298       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1299       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1300       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1301       PetscCall(PetscViewerFlush(viewer));
1302       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1303       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1304       PetscCall(VecScatterView(aij->Mvctx, viewer));
1305       PetscFunctionReturn(PETSC_SUCCESS);
1306     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1307       PetscInt inodecount, inodelimit, *inodes;
1308       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1309       if (inodes) {
1310         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1311       } else {
1312         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1313       }
1314       PetscFunctionReturn(PETSC_SUCCESS);
1315     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1316       PetscFunctionReturn(PETSC_SUCCESS);
1317     }
1318   } else if (isbinary) {
1319     if (size == 1) {
1320       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1321       PetscCall(MatView(aij->A, viewer));
1322     } else {
1323       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1324     }
1325     PetscFunctionReturn(PETSC_SUCCESS);
1326   } else if (iascii && size == 1) {
1327     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1328     PetscCall(MatView(aij->A, viewer));
1329     PetscFunctionReturn(PETSC_SUCCESS);
1330   } else if (isdraw) {
1331     PetscDraw draw;
1332     PetscBool isnull;
1333     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1334     PetscCall(PetscDrawIsNull(draw, &isnull));
1335     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1336   }
1337 
1338   { /* assemble the entire matrix onto first processor */
1339     Mat A = NULL, Av;
1340     IS  isrow, iscol;
1341 
1342     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1343     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1344     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1345     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1346     /*  The commented code uses MatCreateSubMatrices instead */
1347     /*
1348     Mat *AA, A = NULL, Av;
1349     IS  isrow,iscol;
1350 
1351     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1352     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1353     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1354     if (rank == 0) {
1355        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1356        A    = AA[0];
1357        Av   = AA[0];
1358     }
1359     PetscCall(MatDestroySubMatrices(1,&AA));
1360 */
1361     PetscCall(ISDestroy(&iscol));
1362     PetscCall(ISDestroy(&isrow));
1363     /*
1364        Everyone has to call to draw the matrix since the graphics waits are
1365        synchronized across all processors that share the PetscDraw object
1366     */
1367     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1368     if (rank == 0) {
1369       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1370       PetscCall(MatView_SeqAIJ(Av, sviewer));
1371     }
1372     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1373     PetscCall(MatDestroy(&A));
1374   }
1375   PetscFunctionReturn(PETSC_SUCCESS);
1376 }
1377 
1378 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1379 {
1380   PetscBool iascii, isdraw, issocket, isbinary;
1381 
1382   PetscFunctionBegin;
1383   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1384   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1385   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1386   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1387   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1388   PetscFunctionReturn(PETSC_SUCCESS);
1389 }
1390 
1391 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1392 {
1393   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1394   Vec         bb1 = NULL;
1395   PetscBool   hasop;
1396 
1397   PetscFunctionBegin;
1398   if (flag == SOR_APPLY_UPPER) {
1399     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1400     PetscFunctionReturn(PETSC_SUCCESS);
1401   }
1402 
1403   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1404 
1405   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1406     if (flag & SOR_ZERO_INITIAL_GUESS) {
1407       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1408       its--;
1409     }
1410 
1411     while (its--) {
1412       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1413       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1414 
1415       /* update rhs: bb1 = bb - B*x */
1416       PetscCall(VecScale(mat->lvec, -1.0));
1417       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1418 
1419       /* local sweep */
1420       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1421     }
1422   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1423     if (flag & SOR_ZERO_INITIAL_GUESS) {
1424       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1425       its--;
1426     }
1427     while (its--) {
1428       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1429       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1430 
1431       /* update rhs: bb1 = bb - B*x */
1432       PetscCall(VecScale(mat->lvec, -1.0));
1433       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1434 
1435       /* local sweep */
1436       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1437     }
1438   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1439     if (flag & SOR_ZERO_INITIAL_GUESS) {
1440       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1441       its--;
1442     }
1443     while (its--) {
1444       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1445       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1446 
1447       /* update rhs: bb1 = bb - B*x */
1448       PetscCall(VecScale(mat->lvec, -1.0));
1449       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1450 
1451       /* local sweep */
1452       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1453     }
1454   } else if (flag & SOR_EISENSTAT) {
1455     Vec xx1;
1456 
1457     PetscCall(VecDuplicate(bb, &xx1));
1458     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1459 
1460     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1461     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1462     if (!mat->diag) {
1463       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1464       PetscCall(MatGetDiagonal(matin, mat->diag));
1465     }
1466     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1467     if (hasop) {
1468       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1469     } else {
1470       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1471     }
1472     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1473 
1474     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1475 
1476     /* local sweep */
1477     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1478     PetscCall(VecAXPY(xx, 1.0, xx1));
1479     PetscCall(VecDestroy(&xx1));
1480   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1481 
1482   PetscCall(VecDestroy(&bb1));
1483 
1484   matin->factorerrortype = mat->A->factorerrortype;
1485   PetscFunctionReturn(PETSC_SUCCESS);
1486 }
1487 
1488 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1489 {
1490   Mat             aA, aB, Aperm;
1491   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1492   PetscScalar    *aa, *ba;
1493   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1494   PetscSF         rowsf, sf;
1495   IS              parcolp = NULL;
1496   PetscBool       done;
1497 
1498   PetscFunctionBegin;
1499   PetscCall(MatGetLocalSize(A, &m, &n));
1500   PetscCall(ISGetIndices(rowp, &rwant));
1501   PetscCall(ISGetIndices(colp, &cwant));
1502   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1503 
1504   /* Invert row permutation to find out where my rows should go */
1505   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1506   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1507   PetscCall(PetscSFSetFromOptions(rowsf));
1508   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1509   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1510   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1511 
1512   /* Invert column permutation to find out where my columns should go */
1513   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1514   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1515   PetscCall(PetscSFSetFromOptions(sf));
1516   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1517   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1518   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1519   PetscCall(PetscSFDestroy(&sf));
1520 
1521   PetscCall(ISRestoreIndices(rowp, &rwant));
1522   PetscCall(ISRestoreIndices(colp, &cwant));
1523   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1524 
1525   /* Find out where my gcols should go */
1526   PetscCall(MatGetSize(aB, NULL, &ng));
1527   PetscCall(PetscMalloc1(ng, &gcdest));
1528   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1529   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1530   PetscCall(PetscSFSetFromOptions(sf));
1531   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1532   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1533   PetscCall(PetscSFDestroy(&sf));
1534 
1535   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1536   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1537   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1538   for (i = 0; i < m; i++) {
1539     PetscInt    row = rdest[i];
1540     PetscMPIInt rowner;
1541     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1542     for (j = ai[i]; j < ai[i + 1]; j++) {
1543       PetscInt    col = cdest[aj[j]];
1544       PetscMPIInt cowner;
1545       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1546       if (rowner == cowner) dnnz[i]++;
1547       else onnz[i]++;
1548     }
1549     for (j = bi[i]; j < bi[i + 1]; j++) {
1550       PetscInt    col = gcdest[bj[j]];
1551       PetscMPIInt cowner;
1552       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1553       if (rowner == cowner) dnnz[i]++;
1554       else onnz[i]++;
1555     }
1556   }
1557   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1558   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1559   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1560   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1561   PetscCall(PetscSFDestroy(&rowsf));
1562 
1563   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1564   PetscCall(MatSeqAIJGetArray(aA, &aa));
1565   PetscCall(MatSeqAIJGetArray(aB, &ba));
1566   for (i = 0; i < m; i++) {
1567     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1568     PetscInt  j0, rowlen;
1569     rowlen = ai[i + 1] - ai[i];
1570     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1571       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1572       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1573     }
1574     rowlen = bi[i + 1] - bi[i];
1575     for (j0 = j = 0; j < rowlen; j0 = j) {
1576       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1577       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1578     }
1579   }
1580   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1581   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1582   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1583   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1584   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1585   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1586   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1587   PetscCall(PetscFree3(work, rdest, cdest));
1588   PetscCall(PetscFree(gcdest));
1589   if (parcolp) PetscCall(ISDestroy(&colp));
1590   *B = Aperm;
1591   PetscFunctionReturn(PETSC_SUCCESS);
1592 }
1593 
1594 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1595 {
1596   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1597 
1598   PetscFunctionBegin;
1599   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1600   if (ghosts) *ghosts = aij->garray;
1601   PetscFunctionReturn(PETSC_SUCCESS);
1602 }
1603 
1604 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1605 {
1606   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1607   Mat            A = mat->A, B = mat->B;
1608   PetscLogDouble isend[5], irecv[5];
1609 
1610   PetscFunctionBegin;
1611   info->block_size = 1.0;
1612   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1613 
1614   isend[0] = info->nz_used;
1615   isend[1] = info->nz_allocated;
1616   isend[2] = info->nz_unneeded;
1617   isend[3] = info->memory;
1618   isend[4] = info->mallocs;
1619 
1620   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1621 
1622   isend[0] += info->nz_used;
1623   isend[1] += info->nz_allocated;
1624   isend[2] += info->nz_unneeded;
1625   isend[3] += info->memory;
1626   isend[4] += info->mallocs;
1627   if (flag == MAT_LOCAL) {
1628     info->nz_used      = isend[0];
1629     info->nz_allocated = isend[1];
1630     info->nz_unneeded  = isend[2];
1631     info->memory       = isend[3];
1632     info->mallocs      = isend[4];
1633   } else if (flag == MAT_GLOBAL_MAX) {
1634     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1635 
1636     info->nz_used      = irecv[0];
1637     info->nz_allocated = irecv[1];
1638     info->nz_unneeded  = irecv[2];
1639     info->memory       = irecv[3];
1640     info->mallocs      = irecv[4];
1641   } else if (flag == MAT_GLOBAL_SUM) {
1642     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1643 
1644     info->nz_used      = irecv[0];
1645     info->nz_allocated = irecv[1];
1646     info->nz_unneeded  = irecv[2];
1647     info->memory       = irecv[3];
1648     info->mallocs      = irecv[4];
1649   }
1650   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1651   info->fill_ratio_needed = 0;
1652   info->factor_mallocs    = 0;
1653   PetscFunctionReturn(PETSC_SUCCESS);
1654 }
1655 
1656 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1657 {
1658   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1659 
1660   PetscFunctionBegin;
1661   switch (op) {
1662   case MAT_NEW_NONZERO_LOCATIONS:
1663   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1664   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1665   case MAT_KEEP_NONZERO_PATTERN:
1666   case MAT_NEW_NONZERO_LOCATION_ERR:
1667   case MAT_USE_INODES:
1668   case MAT_IGNORE_ZERO_ENTRIES:
1669   case MAT_FORM_EXPLICIT_TRANSPOSE:
1670     MatCheckPreallocated(A, 1);
1671     PetscCall(MatSetOption(a->A, op, flg));
1672     PetscCall(MatSetOption(a->B, op, flg));
1673     break;
1674   case MAT_ROW_ORIENTED:
1675     MatCheckPreallocated(A, 1);
1676     a->roworiented = flg;
1677 
1678     PetscCall(MatSetOption(a->A, op, flg));
1679     PetscCall(MatSetOption(a->B, op, flg));
1680     break;
1681   case MAT_FORCE_DIAGONAL_ENTRIES:
1682   case MAT_SORTED_FULL:
1683     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1684     break;
1685   case MAT_IGNORE_OFF_PROC_ENTRIES:
1686     a->donotstash = flg;
1687     break;
1688   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1689   case MAT_SPD:
1690   case MAT_SYMMETRIC:
1691   case MAT_STRUCTURALLY_SYMMETRIC:
1692   case MAT_HERMITIAN:
1693   case MAT_SYMMETRY_ETERNAL:
1694   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1695   case MAT_SPD_ETERNAL:
1696     /* if the diagonal matrix is square it inherits some of the properties above */
1697     break;
1698   case MAT_SUBMAT_SINGLEIS:
1699     A->submat_singleis = flg;
1700     break;
1701   case MAT_STRUCTURE_ONLY:
1702     /* The option is handled directly by MatSetOption() */
1703     break;
1704   default:
1705     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1706   }
1707   PetscFunctionReturn(PETSC_SUCCESS);
1708 }
1709 
1710 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1711 {
1712   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1713   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1714   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1715   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1716   PetscInt    *cmap, *idx_p;
1717 
1718   PetscFunctionBegin;
1719   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1720   mat->getrowactive = PETSC_TRUE;
1721 
1722   if (!mat->rowvalues && (idx || v)) {
1723     /*
1724         allocate enough space to hold information from the longest row.
1725     */
1726     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1727     PetscInt    max = 1, tmp;
1728     for (i = 0; i < matin->rmap->n; i++) {
1729       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1730       if (max < tmp) max = tmp;
1731     }
1732     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1733   }
1734 
1735   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1736   lrow = row - rstart;
1737 
1738   pvA = &vworkA;
1739   pcA = &cworkA;
1740   pvB = &vworkB;
1741   pcB = &cworkB;
1742   if (!v) {
1743     pvA = NULL;
1744     pvB = NULL;
1745   }
1746   if (!idx) {
1747     pcA = NULL;
1748     if (!v) pcB = NULL;
1749   }
1750   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1751   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1752   nztot = nzA + nzB;
1753 
1754   cmap = mat->garray;
1755   if (v || idx) {
1756     if (nztot) {
1757       /* Sort by increasing column numbers, assuming A and B already sorted */
1758       PetscInt imark = -1;
1759       if (v) {
1760         *v = v_p = mat->rowvalues;
1761         for (i = 0; i < nzB; i++) {
1762           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1763           else break;
1764         }
1765         imark = i;
1766         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1767         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1768       }
1769       if (idx) {
1770         *idx = idx_p = mat->rowindices;
1771         if (imark > -1) {
1772           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1773         } else {
1774           for (i = 0; i < nzB; i++) {
1775             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1776             else break;
1777           }
1778           imark = i;
1779         }
1780         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1781         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1782       }
1783     } else {
1784       if (idx) *idx = NULL;
1785       if (v) *v = NULL;
1786     }
1787   }
1788   *nz = nztot;
1789   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1790   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1791   PetscFunctionReturn(PETSC_SUCCESS);
1792 }
1793 
1794 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1795 {
1796   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1797 
1798   PetscFunctionBegin;
1799   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1800   aij->getrowactive = PETSC_FALSE;
1801   PetscFunctionReturn(PETSC_SUCCESS);
1802 }
1803 
1804 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1805 {
1806   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1807   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1808   PetscInt         i, j, cstart = mat->cmap->rstart;
1809   PetscReal        sum = 0.0;
1810   const MatScalar *v, *amata, *bmata;
1811 
1812   PetscFunctionBegin;
1813   if (aij->size == 1) {
1814     PetscCall(MatNorm(aij->A, type, norm));
1815   } else {
1816     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1817     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1818     if (type == NORM_FROBENIUS) {
1819       v = amata;
1820       for (i = 0; i < amat->nz; i++) {
1821         sum += PetscRealPart(PetscConj(*v) * (*v));
1822         v++;
1823       }
1824       v = bmata;
1825       for (i = 0; i < bmat->nz; i++) {
1826         sum += PetscRealPart(PetscConj(*v) * (*v));
1827         v++;
1828       }
1829       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1830       *norm = PetscSqrtReal(*norm);
1831       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1832     } else if (type == NORM_1) { /* max column norm */
1833       PetscReal *tmp, *tmp2;
1834       PetscInt  *jj, *garray = aij->garray;
1835       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1836       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1837       *norm = 0.0;
1838       v     = amata;
1839       jj    = amat->j;
1840       for (j = 0; j < amat->nz; j++) {
1841         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1842         v++;
1843       }
1844       v  = bmata;
1845       jj = bmat->j;
1846       for (j = 0; j < bmat->nz; j++) {
1847         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1848         v++;
1849       }
1850       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1851       for (j = 0; j < mat->cmap->N; j++) {
1852         if (tmp2[j] > *norm) *norm = tmp2[j];
1853       }
1854       PetscCall(PetscFree(tmp));
1855       PetscCall(PetscFree(tmp2));
1856       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1857     } else if (type == NORM_INFINITY) { /* max row norm */
1858       PetscReal ntemp = 0.0;
1859       for (j = 0; j < aij->A->rmap->n; j++) {
1860         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1861         sum = 0.0;
1862         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1863           sum += PetscAbsScalar(*v);
1864           v++;
1865         }
1866         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1867         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1868           sum += PetscAbsScalar(*v);
1869           v++;
1870         }
1871         if (sum > ntemp) ntemp = sum;
1872       }
1873       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1874       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1875     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1876     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1877     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1878   }
1879   PetscFunctionReturn(PETSC_SUCCESS);
1880 }
1881 
1882 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1883 {
1884   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1885   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1886   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1887   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1888   Mat              B, A_diag, *B_diag;
1889   const MatScalar *pbv, *bv;
1890 
1891   PetscFunctionBegin;
1892   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1893   ma = A->rmap->n;
1894   na = A->cmap->n;
1895   mb = a->B->rmap->n;
1896   nb = a->B->cmap->n;
1897   ai = Aloc->i;
1898   aj = Aloc->j;
1899   bi = Bloc->i;
1900   bj = Bloc->j;
1901   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1902     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1903     PetscSFNode         *oloc;
1904     PETSC_UNUSED PetscSF sf;
1905 
1906     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1907     /* compute d_nnz for preallocation */
1908     PetscCall(PetscArrayzero(d_nnz, na));
1909     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1910     /* compute local off-diagonal contributions */
1911     PetscCall(PetscArrayzero(g_nnz, nb));
1912     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1913     /* map those to global */
1914     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1915     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1916     PetscCall(PetscSFSetFromOptions(sf));
1917     PetscCall(PetscArrayzero(o_nnz, na));
1918     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1919     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1920     PetscCall(PetscSFDestroy(&sf));
1921 
1922     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1923     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1924     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1925     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1926     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1927     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1928   } else {
1929     B = *matout;
1930     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1931   }
1932 
1933   b           = (Mat_MPIAIJ *)B->data;
1934   A_diag      = a->A;
1935   B_diag      = &b->A;
1936   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1937   A_diag_ncol = A_diag->cmap->N;
1938   B_diag_ilen = sub_B_diag->ilen;
1939   B_diag_i    = sub_B_diag->i;
1940 
1941   /* Set ilen for diagonal of B */
1942   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1943 
1944   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1945   very quickly (=without using MatSetValues), because all writes are local. */
1946   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1947   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1948 
1949   /* copy over the B part */
1950   PetscCall(PetscMalloc1(bi[mb], &cols));
1951   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1952   pbv = bv;
1953   row = A->rmap->rstart;
1954   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1955   cols_tmp = cols;
1956   for (i = 0; i < mb; i++) {
1957     ncol = bi[i + 1] - bi[i];
1958     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1959     row++;
1960     if (pbv) pbv += ncol;
1961     if (cols_tmp) cols_tmp += ncol;
1962   }
1963   PetscCall(PetscFree(cols));
1964   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1965 
1966   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1967   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1968   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1969     *matout = B;
1970   } else {
1971     PetscCall(MatHeaderMerge(A, &B));
1972   }
1973   PetscFunctionReturn(PETSC_SUCCESS);
1974 }
1975 
1976 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1977 {
1978   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1979   Mat         a = aij->A, b = aij->B;
1980   PetscInt    s1, s2, s3;
1981 
1982   PetscFunctionBegin;
1983   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1984   if (rr) {
1985     PetscCall(VecGetLocalSize(rr, &s1));
1986     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1987     /* Overlap communication with computation. */
1988     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1989   }
1990   if (ll) {
1991     PetscCall(VecGetLocalSize(ll, &s1));
1992     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1993     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1994   }
1995   /* scale  the diagonal block */
1996   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1997 
1998   if (rr) {
1999     /* Do a scatter end and then right scale the off-diagonal block */
2000     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2001     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2002   }
2003   PetscFunctionReturn(PETSC_SUCCESS);
2004 }
2005 
2006 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2007 {
2008   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2009 
2010   PetscFunctionBegin;
2011   PetscCall(MatSetUnfactored(a->A));
2012   PetscFunctionReturn(PETSC_SUCCESS);
2013 }
2014 
2015 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2016 {
2017   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2018   Mat         a, b, c, d;
2019   PetscBool   flg;
2020 
2021   PetscFunctionBegin;
2022   a = matA->A;
2023   b = matA->B;
2024   c = matB->A;
2025   d = matB->B;
2026 
2027   PetscCall(MatEqual(a, c, &flg));
2028   if (flg) PetscCall(MatEqual(b, d, &flg));
2029   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2030   PetscFunctionReturn(PETSC_SUCCESS);
2031 }
2032 
2033 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2034 {
2035   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2036   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2037 
2038   PetscFunctionBegin;
2039   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2040   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2041     /* because of the column compression in the off-processor part of the matrix a->B,
2042        the number of columns in a->B and b->B may be different, hence we cannot call
2043        the MatCopy() directly on the two parts. If need be, we can provide a more
2044        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2045        then copying the submatrices */
2046     PetscCall(MatCopy_Basic(A, B, str));
2047   } else {
2048     PetscCall(MatCopy(a->A, b->A, str));
2049     PetscCall(MatCopy(a->B, b->B, str));
2050   }
2051   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2052   PetscFunctionReturn(PETSC_SUCCESS);
2053 }
2054 
2055 /*
2056    Computes the number of nonzeros per row needed for preallocation when X and Y
2057    have different nonzero structure.
2058 */
2059 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2060 {
2061   PetscInt i, j, k, nzx, nzy;
2062 
2063   PetscFunctionBegin;
2064   /* Set the number of nonzeros in the new matrix */
2065   for (i = 0; i < m; i++) {
2066     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2067     nzx    = xi[i + 1] - xi[i];
2068     nzy    = yi[i + 1] - yi[i];
2069     nnz[i] = 0;
2070     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2071       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2072       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2073       nnz[i]++;
2074     }
2075     for (; k < nzy; k++) nnz[i]++;
2076   }
2077   PetscFunctionReturn(PETSC_SUCCESS);
2078 }
2079 
2080 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2081 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2082 {
2083   PetscInt    m = Y->rmap->N;
2084   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2085   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2086 
2087   PetscFunctionBegin;
2088   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2089   PetscFunctionReturn(PETSC_SUCCESS);
2090 }
2091 
2092 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2093 {
2094   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2095 
2096   PetscFunctionBegin;
2097   if (str == SAME_NONZERO_PATTERN) {
2098     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2099     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2100   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2101     PetscCall(MatAXPY_Basic(Y, a, X, str));
2102   } else {
2103     Mat       B;
2104     PetscInt *nnz_d, *nnz_o;
2105 
2106     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2107     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2108     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2109     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2110     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2111     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2112     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2113     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2114     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2115     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2116     PetscCall(MatHeaderMerge(Y, &B));
2117     PetscCall(PetscFree(nnz_d));
2118     PetscCall(PetscFree(nnz_o));
2119   }
2120   PetscFunctionReturn(PETSC_SUCCESS);
2121 }
2122 
2123 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2124 
2125 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2126 {
2127   PetscFunctionBegin;
2128   if (PetscDefined(USE_COMPLEX)) {
2129     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2130 
2131     PetscCall(MatConjugate_SeqAIJ(aij->A));
2132     PetscCall(MatConjugate_SeqAIJ(aij->B));
2133   }
2134   PetscFunctionReturn(PETSC_SUCCESS);
2135 }
2136 
2137 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2138 {
2139   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2140 
2141   PetscFunctionBegin;
2142   PetscCall(MatRealPart(a->A));
2143   PetscCall(MatRealPart(a->B));
2144   PetscFunctionReturn(PETSC_SUCCESS);
2145 }
2146 
2147 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2148 {
2149   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2150 
2151   PetscFunctionBegin;
2152   PetscCall(MatImaginaryPart(a->A));
2153   PetscCall(MatImaginaryPart(a->B));
2154   PetscFunctionReturn(PETSC_SUCCESS);
2155 }
2156 
2157 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2158 {
2159   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2160   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2161   PetscScalar       *va, *vv;
2162   Vec                vB, vA;
2163   const PetscScalar *vb;
2164 
2165   PetscFunctionBegin;
2166   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2167   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2168 
2169   PetscCall(VecGetArrayWrite(vA, &va));
2170   if (idx) {
2171     for (i = 0; i < m; i++) {
2172       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2173     }
2174   }
2175 
2176   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2177   PetscCall(PetscMalloc1(m, &idxb));
2178   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2179 
2180   PetscCall(VecGetArrayWrite(v, &vv));
2181   PetscCall(VecGetArrayRead(vB, &vb));
2182   for (i = 0; i < m; i++) {
2183     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2184       vv[i] = vb[i];
2185       if (idx) idx[i] = a->garray[idxb[i]];
2186     } else {
2187       vv[i] = va[i];
2188       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2189     }
2190   }
2191   PetscCall(VecRestoreArrayWrite(vA, &vv));
2192   PetscCall(VecRestoreArrayWrite(vA, &va));
2193   PetscCall(VecRestoreArrayRead(vB, &vb));
2194   PetscCall(PetscFree(idxb));
2195   PetscCall(VecDestroy(&vA));
2196   PetscCall(VecDestroy(&vB));
2197   PetscFunctionReturn(PETSC_SUCCESS);
2198 }
2199 
2200 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2201 {
2202   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2203   PetscInt    m = A->rmap->n;
2204   Vec         vB, vA;
2205 
2206   PetscFunctionBegin;
2207   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2208   PetscCall(MatGetRowSumAbs(a->A, vA));
2209   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2210   PetscCall(MatGetRowSumAbs(a->B, vB));
2211   PetscCall(VecAXPY(vA, 1.0, vB));
2212   PetscCall(VecDestroy(&vB));
2213   PetscCall(VecCopy(vA, v));
2214   PetscCall(VecDestroy(&vA));
2215   PetscFunctionReturn(PETSC_SUCCESS);
2216 }
2217 
2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2219 {
2220   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2221   PetscInt           m = A->rmap->n, n = A->cmap->n;
2222   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2223   PetscInt          *cmap = mat->garray;
2224   PetscInt          *diagIdx, *offdiagIdx;
2225   Vec                diagV, offdiagV;
2226   PetscScalar       *a, *diagA, *offdiagA;
2227   const PetscScalar *ba, *bav;
2228   PetscInt           r, j, col, ncols, *bi, *bj;
2229   Mat                B = mat->B;
2230   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2231 
2232   PetscFunctionBegin;
2233   /* When a process holds entire A and other processes have no entry */
2234   if (A->cmap->N == n) {
2235     PetscCall(VecGetArrayWrite(v, &diagA));
2236     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2237     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2238     PetscCall(VecDestroy(&diagV));
2239     PetscCall(VecRestoreArrayWrite(v, &diagA));
2240     PetscFunctionReturn(PETSC_SUCCESS);
2241   } else if (n == 0) {
2242     if (m) {
2243       PetscCall(VecGetArrayWrite(v, &a));
2244       for (r = 0; r < m; r++) {
2245         a[r] = 0.0;
2246         if (idx) idx[r] = -1;
2247       }
2248       PetscCall(VecRestoreArrayWrite(v, &a));
2249     }
2250     PetscFunctionReturn(PETSC_SUCCESS);
2251   }
2252 
2253   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2254   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2255   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2256   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2257 
2258   /* Get offdiagIdx[] for implicit 0.0 */
2259   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2260   ba = bav;
2261   bi = b->i;
2262   bj = b->j;
2263   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2264   for (r = 0; r < m; r++) {
2265     ncols = bi[r + 1] - bi[r];
2266     if (ncols == A->cmap->N - n) { /* Brow is dense */
2267       offdiagA[r]   = *ba;
2268       offdiagIdx[r] = cmap[0];
2269     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2270       offdiagA[r] = 0.0;
2271 
2272       /* Find first hole in the cmap */
2273       for (j = 0; j < ncols; j++) {
2274         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2275         if (col > j && j < cstart) {
2276           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2277           break;
2278         } else if (col > j + n && j >= cstart) {
2279           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2280           break;
2281         }
2282       }
2283       if (j == ncols && ncols < A->cmap->N - n) {
2284         /* a hole is outside compressed Bcols */
2285         if (ncols == 0) {
2286           if (cstart) {
2287             offdiagIdx[r] = 0;
2288           } else offdiagIdx[r] = cend;
2289         } else { /* ncols > 0 */
2290           offdiagIdx[r] = cmap[ncols - 1] + 1;
2291           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2292         }
2293       }
2294     }
2295 
2296     for (j = 0; j < ncols; j++) {
2297       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2298         offdiagA[r]   = *ba;
2299         offdiagIdx[r] = cmap[*bj];
2300       }
2301       ba++;
2302       bj++;
2303     }
2304   }
2305 
2306   PetscCall(VecGetArrayWrite(v, &a));
2307   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2308   for (r = 0; r < m; ++r) {
2309     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2310       a[r] = diagA[r];
2311       if (idx) idx[r] = cstart + diagIdx[r];
2312     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2313       a[r] = diagA[r];
2314       if (idx) {
2315         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2316           idx[r] = cstart + diagIdx[r];
2317         } else idx[r] = offdiagIdx[r];
2318       }
2319     } else {
2320       a[r] = offdiagA[r];
2321       if (idx) idx[r] = offdiagIdx[r];
2322     }
2323   }
2324   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2325   PetscCall(VecRestoreArrayWrite(v, &a));
2326   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2327   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2328   PetscCall(VecDestroy(&diagV));
2329   PetscCall(VecDestroy(&offdiagV));
2330   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2331   PetscFunctionReturn(PETSC_SUCCESS);
2332 }
2333 
2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2335 {
2336   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2337   PetscInt           m = A->rmap->n, n = A->cmap->n;
2338   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2339   PetscInt          *cmap = mat->garray;
2340   PetscInt          *diagIdx, *offdiagIdx;
2341   Vec                diagV, offdiagV;
2342   PetscScalar       *a, *diagA, *offdiagA;
2343   const PetscScalar *ba, *bav;
2344   PetscInt           r, j, col, ncols, *bi, *bj;
2345   Mat                B = mat->B;
2346   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2347 
2348   PetscFunctionBegin;
2349   /* When a process holds entire A and other processes have no entry */
2350   if (A->cmap->N == n) {
2351     PetscCall(VecGetArrayWrite(v, &diagA));
2352     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2353     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2354     PetscCall(VecDestroy(&diagV));
2355     PetscCall(VecRestoreArrayWrite(v, &diagA));
2356     PetscFunctionReturn(PETSC_SUCCESS);
2357   } else if (n == 0) {
2358     if (m) {
2359       PetscCall(VecGetArrayWrite(v, &a));
2360       for (r = 0; r < m; r++) {
2361         a[r] = PETSC_MAX_REAL;
2362         if (idx) idx[r] = -1;
2363       }
2364       PetscCall(VecRestoreArrayWrite(v, &a));
2365     }
2366     PetscFunctionReturn(PETSC_SUCCESS);
2367   }
2368 
2369   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2370   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2371   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2372   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2373 
2374   /* Get offdiagIdx[] for implicit 0.0 */
2375   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2376   ba = bav;
2377   bi = b->i;
2378   bj = b->j;
2379   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2380   for (r = 0; r < m; r++) {
2381     ncols = bi[r + 1] - bi[r];
2382     if (ncols == A->cmap->N - n) { /* Brow is dense */
2383       offdiagA[r]   = *ba;
2384       offdiagIdx[r] = cmap[0];
2385     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2386       offdiagA[r] = 0.0;
2387 
2388       /* Find first hole in the cmap */
2389       for (j = 0; j < ncols; j++) {
2390         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2391         if (col > j && j < cstart) {
2392           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2393           break;
2394         } else if (col > j + n && j >= cstart) {
2395           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2396           break;
2397         }
2398       }
2399       if (j == ncols && ncols < A->cmap->N - n) {
2400         /* a hole is outside compressed Bcols */
2401         if (ncols == 0) {
2402           if (cstart) {
2403             offdiagIdx[r] = 0;
2404           } else offdiagIdx[r] = cend;
2405         } else { /* ncols > 0 */
2406           offdiagIdx[r] = cmap[ncols - 1] + 1;
2407           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2408         }
2409       }
2410     }
2411 
2412     for (j = 0; j < ncols; j++) {
2413       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2414         offdiagA[r]   = *ba;
2415         offdiagIdx[r] = cmap[*bj];
2416       }
2417       ba++;
2418       bj++;
2419     }
2420   }
2421 
2422   PetscCall(VecGetArrayWrite(v, &a));
2423   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2424   for (r = 0; r < m; ++r) {
2425     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2426       a[r] = diagA[r];
2427       if (idx) idx[r] = cstart + diagIdx[r];
2428     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2429       a[r] = diagA[r];
2430       if (idx) {
2431         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2432           idx[r] = cstart + diagIdx[r];
2433         } else idx[r] = offdiagIdx[r];
2434       }
2435     } else {
2436       a[r] = offdiagA[r];
2437       if (idx) idx[r] = offdiagIdx[r];
2438     }
2439   }
2440   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2441   PetscCall(VecRestoreArrayWrite(v, &a));
2442   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2443   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2444   PetscCall(VecDestroy(&diagV));
2445   PetscCall(VecDestroy(&offdiagV));
2446   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2447   PetscFunctionReturn(PETSC_SUCCESS);
2448 }
2449 
2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2451 {
2452   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2453   PetscInt           m = A->rmap->n, n = A->cmap->n;
2454   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2455   PetscInt          *cmap = mat->garray;
2456   PetscInt          *diagIdx, *offdiagIdx;
2457   Vec                diagV, offdiagV;
2458   PetscScalar       *a, *diagA, *offdiagA;
2459   const PetscScalar *ba, *bav;
2460   PetscInt           r, j, col, ncols, *bi, *bj;
2461   Mat                B = mat->B;
2462   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2463 
2464   PetscFunctionBegin;
2465   /* When a process holds entire A and other processes have no entry */
2466   if (A->cmap->N == n) {
2467     PetscCall(VecGetArrayWrite(v, &diagA));
2468     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2469     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2470     PetscCall(VecDestroy(&diagV));
2471     PetscCall(VecRestoreArrayWrite(v, &diagA));
2472     PetscFunctionReturn(PETSC_SUCCESS);
2473   } else if (n == 0) {
2474     if (m) {
2475       PetscCall(VecGetArrayWrite(v, &a));
2476       for (r = 0; r < m; r++) {
2477         a[r] = PETSC_MIN_REAL;
2478         if (idx) idx[r] = -1;
2479       }
2480       PetscCall(VecRestoreArrayWrite(v, &a));
2481     }
2482     PetscFunctionReturn(PETSC_SUCCESS);
2483   }
2484 
2485   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2486   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2487   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2488   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2489 
2490   /* Get offdiagIdx[] for implicit 0.0 */
2491   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2492   ba = bav;
2493   bi = b->i;
2494   bj = b->j;
2495   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2496   for (r = 0; r < m; r++) {
2497     ncols = bi[r + 1] - bi[r];
2498     if (ncols == A->cmap->N - n) { /* Brow is dense */
2499       offdiagA[r]   = *ba;
2500       offdiagIdx[r] = cmap[0];
2501     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2502       offdiagA[r] = 0.0;
2503 
2504       /* Find first hole in the cmap */
2505       for (j = 0; j < ncols; j++) {
2506         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2507         if (col > j && j < cstart) {
2508           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2509           break;
2510         } else if (col > j + n && j >= cstart) {
2511           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2512           break;
2513         }
2514       }
2515       if (j == ncols && ncols < A->cmap->N - n) {
2516         /* a hole is outside compressed Bcols */
2517         if (ncols == 0) {
2518           if (cstart) {
2519             offdiagIdx[r] = 0;
2520           } else offdiagIdx[r] = cend;
2521         } else { /* ncols > 0 */
2522           offdiagIdx[r] = cmap[ncols - 1] + 1;
2523           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2524         }
2525       }
2526     }
2527 
2528     for (j = 0; j < ncols; j++) {
2529       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2530         offdiagA[r]   = *ba;
2531         offdiagIdx[r] = cmap[*bj];
2532       }
2533       ba++;
2534       bj++;
2535     }
2536   }
2537 
2538   PetscCall(VecGetArrayWrite(v, &a));
2539   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2540   for (r = 0; r < m; ++r) {
2541     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2542       a[r] = diagA[r];
2543       if (idx) idx[r] = cstart + diagIdx[r];
2544     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2545       a[r] = diagA[r];
2546       if (idx) {
2547         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2548           idx[r] = cstart + diagIdx[r];
2549         } else idx[r] = offdiagIdx[r];
2550       }
2551     } else {
2552       a[r] = offdiagA[r];
2553       if (idx) idx[r] = offdiagIdx[r];
2554     }
2555   }
2556   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2557   PetscCall(VecRestoreArrayWrite(v, &a));
2558   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2559   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2560   PetscCall(VecDestroy(&diagV));
2561   PetscCall(VecDestroy(&offdiagV));
2562   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2563   PetscFunctionReturn(PETSC_SUCCESS);
2564 }
2565 
2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2567 {
2568   Mat *dummy;
2569 
2570   PetscFunctionBegin;
2571   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2572   *newmat = *dummy;
2573   PetscCall(PetscFree(dummy));
2574   PetscFunctionReturn(PETSC_SUCCESS);
2575 }
2576 
2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2578 {
2579   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2580 
2581   PetscFunctionBegin;
2582   PetscCall(MatInvertBlockDiagonal(a->A, values));
2583   A->factorerrortype = a->A->factorerrortype;
2584   PetscFunctionReturn(PETSC_SUCCESS);
2585 }
2586 
2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2588 {
2589   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2590 
2591   PetscFunctionBegin;
2592   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2593   PetscCall(MatSetRandom(aij->A, rctx));
2594   if (x->assembled) {
2595     PetscCall(MatSetRandom(aij->B, rctx));
2596   } else {
2597     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2598   }
2599   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2600   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2601   PetscFunctionReturn(PETSC_SUCCESS);
2602 }
2603 
2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2605 {
2606   PetscFunctionBegin;
2607   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2608   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2609   PetscFunctionReturn(PETSC_SUCCESS);
2610 }
2611 
2612 /*@
2613   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2614 
2615   Not Collective
2616 
2617   Input Parameter:
2618 . A - the matrix
2619 
2620   Output Parameter:
2621 . nz - the number of nonzeros
2622 
2623   Level: advanced
2624 
2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2626 @*/
2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2628 {
2629   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2630   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2631   PetscBool   isaij;
2632 
2633   PetscFunctionBegin;
2634   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2635   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2636   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2637   PetscFunctionReturn(PETSC_SUCCESS);
2638 }
2639 
2640 /*@
2641   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2642 
2643   Collective
2644 
2645   Input Parameters:
2646 + A  - the matrix
2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2648 
2649   Level: advanced
2650 
2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2652 @*/
2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2654 {
2655   PetscFunctionBegin;
2656   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2657   PetscFunctionReturn(PETSC_SUCCESS);
2658 }
2659 
2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2661 {
2662   PetscBool sc = PETSC_FALSE, flg;
2663 
2664   PetscFunctionBegin;
2665   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2666   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2667   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2668   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2669   PetscOptionsHeadEnd();
2670   PetscFunctionReturn(PETSC_SUCCESS);
2671 }
2672 
2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2674 {
2675   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2676   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2677 
2678   PetscFunctionBegin;
2679   if (!Y->preallocated) {
2680     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2681   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2682     PetscInt nonew = aij->nonew;
2683     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2684     aij->nonew = nonew;
2685   }
2686   PetscCall(MatShift_Basic(Y, a));
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2691 {
2692   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2693 
2694   PetscFunctionBegin;
2695   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2696   PetscCall(MatMissingDiagonal(a->A, missing, d));
2697   if (d) {
2698     PetscInt rstart;
2699     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2700     *d += rstart;
2701   }
2702   PetscFunctionReturn(PETSC_SUCCESS);
2703 }
2704 
2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2706 {
2707   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2708 
2709   PetscFunctionBegin;
2710   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2711   PetscFunctionReturn(PETSC_SUCCESS);
2712 }
2713 
2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2715 {
2716   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2717 
2718   PetscFunctionBegin;
2719   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2720   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2721   PetscFunctionReturn(PETSC_SUCCESS);
2722 }
2723 
2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2725                                        MatGetRow_MPIAIJ,
2726                                        MatRestoreRow_MPIAIJ,
2727                                        MatMult_MPIAIJ,
2728                                        /* 4*/ MatMultAdd_MPIAIJ,
2729                                        MatMultTranspose_MPIAIJ,
2730                                        MatMultTransposeAdd_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        /*10*/ NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        MatSOR_MPIAIJ,
2738                                        MatTranspose_MPIAIJ,
2739                                        /*15*/ MatGetInfo_MPIAIJ,
2740                                        MatEqual_MPIAIJ,
2741                                        MatGetDiagonal_MPIAIJ,
2742                                        MatDiagonalScale_MPIAIJ,
2743                                        MatNorm_MPIAIJ,
2744                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2745                                        MatAssemblyEnd_MPIAIJ,
2746                                        MatSetOption_MPIAIJ,
2747                                        MatZeroEntries_MPIAIJ,
2748                                        /*24*/ MatZeroRows_MPIAIJ,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                        /*29*/ MatSetUp_MPI_Hash,
2754                                        NULL,
2755                                        NULL,
2756                                        MatGetDiagonalBlock_MPIAIJ,
2757                                        NULL,
2758                                        /*34*/ MatDuplicate_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        /*39*/ MatAXPY_MPIAIJ,
2764                                        MatCreateSubMatrices_MPIAIJ,
2765                                        MatIncreaseOverlap_MPIAIJ,
2766                                        MatGetValues_MPIAIJ,
2767                                        MatCopy_MPIAIJ,
2768                                        /*44*/ MatGetRowMax_MPIAIJ,
2769                                        MatScale_MPIAIJ,
2770                                        MatShift_MPIAIJ,
2771                                        MatDiagonalSet_MPIAIJ,
2772                                        MatZeroRowsColumns_MPIAIJ,
2773                                        /*49*/ MatSetRandom_MPIAIJ,
2774                                        MatGetRowIJ_MPIAIJ,
2775                                        MatRestoreRowIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2779                                        NULL,
2780                                        MatSetUnfactored_MPIAIJ,
2781                                        MatPermute_MPIAIJ,
2782                                        NULL,
2783                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2784                                        MatDestroy_MPIAIJ,
2785                                        MatView_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        /*64*/ NULL,
2789                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2790                                        NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2794                                        MatGetRowMinAbs_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        /*75*/ MatFDColoringApply_AIJ,
2800                                        MatSetFromOptions_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        MatFindZeroDiagonals_MPIAIJ,
2804                                        /*80*/ NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        /*83*/ MatLoad_MPIAIJ,
2808                                        MatIsSymmetric_MPIAIJ,
2809                                        NULL,
2810                                        NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        /*89*/ NULL,
2814                                        NULL,
2815                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2816                                        NULL,
2817                                        NULL,
2818                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                        MatBindToCPU_MPIAIJ,
2823                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2824                                        NULL,
2825                                        NULL,
2826                                        MatConjugate_MPIAIJ,
2827                                        NULL,
2828                                        /*104*/ MatSetValuesRow_MPIAIJ,
2829                                        MatRealPart_MPIAIJ,
2830                                        MatImaginaryPart_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        /*109*/ NULL,
2834                                        NULL,
2835                                        MatGetRowMin_MPIAIJ,
2836                                        NULL,
2837                                        MatMissingDiagonal_MPIAIJ,
2838                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2839                                        NULL,
2840                                        MatGetGhosts_MPIAIJ,
2841                                        NULL,
2842                                        NULL,
2843                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2844                                        NULL,
2845                                        NULL,
2846                                        NULL,
2847                                        MatGetMultiProcBlock_MPIAIJ,
2848                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2849                                        MatGetColumnReductions_MPIAIJ,
2850                                        MatInvertBlockDiagonal_MPIAIJ,
2851                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2852                                        MatCreateSubMatricesMPI_MPIAIJ,
2853                                        /*129*/ NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2857                                        NULL,
2858                                        /*134*/ NULL,
2859                                        NULL,
2860                                        NULL,
2861                                        NULL,
2862                                        NULL,
2863                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2864                                        NULL,
2865                                        NULL,
2866                                        MatFDColoringSetUp_MPIXAIJ,
2867                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2868                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2869                                        /*145*/ NULL,
2870                                        NULL,
2871                                        NULL,
2872                                        MatCreateGraph_Simple_AIJ,
2873                                        NULL,
2874                                        /*150*/ NULL,
2875                                        MatEliminateZeros_MPIAIJ,
2876                                        MatGetRowSumAbs_MPIAIJ};
2877 
2878 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2879 {
2880   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2881 
2882   PetscFunctionBegin;
2883   PetscCall(MatStoreValues(aij->A));
2884   PetscCall(MatStoreValues(aij->B));
2885   PetscFunctionReturn(PETSC_SUCCESS);
2886 }
2887 
2888 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2889 {
2890   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2891 
2892   PetscFunctionBegin;
2893   PetscCall(MatRetrieveValues(aij->A));
2894   PetscCall(MatRetrieveValues(aij->B));
2895   PetscFunctionReturn(PETSC_SUCCESS);
2896 }
2897 
2898 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2899 {
2900   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2901   PetscMPIInt size;
2902 
2903   PetscFunctionBegin;
2904   if (B->hash_active) {
2905     B->ops[0]      = b->cops;
2906     B->hash_active = PETSC_FALSE;
2907   }
2908   PetscCall(PetscLayoutSetUp(B->rmap));
2909   PetscCall(PetscLayoutSetUp(B->cmap));
2910 
2911 #if defined(PETSC_USE_CTABLE)
2912   PetscCall(PetscHMapIDestroy(&b->colmap));
2913 #else
2914   PetscCall(PetscFree(b->colmap));
2915 #endif
2916   PetscCall(PetscFree(b->garray));
2917   PetscCall(VecDestroy(&b->lvec));
2918   PetscCall(VecScatterDestroy(&b->Mvctx));
2919 
2920   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2921   PetscCall(MatDestroy(&b->B));
2922   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2923   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2924   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2925   PetscCall(MatSetType(b->B, MATSEQAIJ));
2926 
2927   PetscCall(MatDestroy(&b->A));
2928   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2929   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2930   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2931   PetscCall(MatSetType(b->A, MATSEQAIJ));
2932 
2933   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2934   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2935   B->preallocated  = PETSC_TRUE;
2936   B->was_assembled = PETSC_FALSE;
2937   B->assembled     = PETSC_FALSE;
2938   PetscFunctionReturn(PETSC_SUCCESS);
2939 }
2940 
2941 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2942 {
2943   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2944 
2945   PetscFunctionBegin;
2946   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2947   PetscCall(PetscLayoutSetUp(B->rmap));
2948   PetscCall(PetscLayoutSetUp(B->cmap));
2949 
2950 #if defined(PETSC_USE_CTABLE)
2951   PetscCall(PetscHMapIDestroy(&b->colmap));
2952 #else
2953   PetscCall(PetscFree(b->colmap));
2954 #endif
2955   PetscCall(PetscFree(b->garray));
2956   PetscCall(VecDestroy(&b->lvec));
2957   PetscCall(VecScatterDestroy(&b->Mvctx));
2958 
2959   PetscCall(MatResetPreallocation(b->A));
2960   PetscCall(MatResetPreallocation(b->B));
2961   B->preallocated  = PETSC_TRUE;
2962   B->was_assembled = PETSC_FALSE;
2963   B->assembled     = PETSC_FALSE;
2964   PetscFunctionReturn(PETSC_SUCCESS);
2965 }
2966 
2967 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2968 {
2969   Mat         mat;
2970   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2971 
2972   PetscFunctionBegin;
2973   *newmat = NULL;
2974   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2975   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2976   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2977   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2978   a = (Mat_MPIAIJ *)mat->data;
2979 
2980   mat->factortype = matin->factortype;
2981   mat->assembled  = matin->assembled;
2982   mat->insertmode = NOT_SET_VALUES;
2983 
2984   a->size         = oldmat->size;
2985   a->rank         = oldmat->rank;
2986   a->donotstash   = oldmat->donotstash;
2987   a->roworiented  = oldmat->roworiented;
2988   a->rowindices   = NULL;
2989   a->rowvalues    = NULL;
2990   a->getrowactive = PETSC_FALSE;
2991 
2992   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2993   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2994   if (matin->hash_active) {
2995     PetscCall(MatSetUp(mat));
2996   } else {
2997     mat->preallocated = matin->preallocated;
2998     if (oldmat->colmap) {
2999 #if defined(PETSC_USE_CTABLE)
3000       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3001 #else
3002       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3003       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3004 #endif
3005     } else a->colmap = NULL;
3006     if (oldmat->garray) {
3007       PetscInt len;
3008       len = oldmat->B->cmap->n;
3009       PetscCall(PetscMalloc1(len + 1, &a->garray));
3010       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3011     } else a->garray = NULL;
3012 
3013     /* It may happen MatDuplicate is called with a non-assembled matrix
3014       In fact, MatDuplicate only requires the matrix to be preallocated
3015       This may happen inside a DMCreateMatrix_Shell */
3016     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3017     if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3018     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3019     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3020   }
3021   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3022   *newmat = mat;
3023   PetscFunctionReturn(PETSC_SUCCESS);
3024 }
3025 
3026 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3027 {
3028   PetscBool isbinary, ishdf5;
3029 
3030   PetscFunctionBegin;
3031   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3032   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3033   /* force binary viewer to load .info file if it has not yet done so */
3034   PetscCall(PetscViewerSetUp(viewer));
3035   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3036   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3037   if (isbinary) {
3038     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3039   } else if (ishdf5) {
3040 #if defined(PETSC_HAVE_HDF5)
3041     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3042 #else
3043     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3044 #endif
3045   } else {
3046     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3047   }
3048   PetscFunctionReturn(PETSC_SUCCESS);
3049 }
3050 
3051 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3052 {
3053   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3054   PetscInt    *rowidxs, *colidxs;
3055   PetscScalar *matvals;
3056 
3057   PetscFunctionBegin;
3058   PetscCall(PetscViewerSetUp(viewer));
3059 
3060   /* read in matrix header */
3061   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3062   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3063   M  = header[1];
3064   N  = header[2];
3065   nz = header[3];
3066   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3067   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3068   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3069 
3070   /* set block sizes from the viewer's .info file */
3071   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3072   /* set global sizes if not set already */
3073   if (mat->rmap->N < 0) mat->rmap->N = M;
3074   if (mat->cmap->N < 0) mat->cmap->N = N;
3075   PetscCall(PetscLayoutSetUp(mat->rmap));
3076   PetscCall(PetscLayoutSetUp(mat->cmap));
3077 
3078   /* check if the matrix sizes are correct */
3079   PetscCall(MatGetSize(mat, &rows, &cols));
3080   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3081 
3082   /* read in row lengths and build row indices */
3083   PetscCall(MatGetLocalSize(mat, &m, NULL));
3084   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3085   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3086   rowidxs[0] = 0;
3087   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3088   if (nz != PETSC_MAX_INT) {
3089     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3090     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3091   }
3092 
3093   /* read in column indices and matrix values */
3094   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3095   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3096   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3097   /* store matrix indices and values */
3098   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3099   PetscCall(PetscFree(rowidxs));
3100   PetscCall(PetscFree2(colidxs, matvals));
3101   PetscFunctionReturn(PETSC_SUCCESS);
3102 }
3103 
3104 /* Not scalable because of ISAllGather() unless getting all columns. */
3105 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3106 {
3107   IS          iscol_local;
3108   PetscBool   isstride;
3109   PetscMPIInt lisstride = 0, gisstride;
3110 
3111   PetscFunctionBegin;
3112   /* check if we are grabbing all columns*/
3113   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3114 
3115   if (isstride) {
3116     PetscInt start, len, mstart, mlen;
3117     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3118     PetscCall(ISGetLocalSize(iscol, &len));
3119     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3120     if (mstart == start && mlen - mstart == len) lisstride = 1;
3121   }
3122 
3123   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3124   if (gisstride) {
3125     PetscInt N;
3126     PetscCall(MatGetSize(mat, NULL, &N));
3127     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3128     PetscCall(ISSetIdentity(iscol_local));
3129     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3130   } else {
3131     PetscInt cbs;
3132     PetscCall(ISGetBlockSize(iscol, &cbs));
3133     PetscCall(ISAllGather(iscol, &iscol_local));
3134     PetscCall(ISSetBlockSize(iscol_local, cbs));
3135   }
3136 
3137   *isseq = iscol_local;
3138   PetscFunctionReturn(PETSC_SUCCESS);
3139 }
3140 
3141 /*
3142  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3143  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3144 
3145  Input Parameters:
3146 +   mat - matrix
3147 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3148            i.e., mat->rstart <= isrow[i] < mat->rend
3149 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3150            i.e., mat->cstart <= iscol[i] < mat->cend
3151 
3152  Output Parameters:
3153 +   isrow_d - sequential row index set for retrieving mat->A
3154 .   iscol_d - sequential  column index set for retrieving mat->A
3155 .   iscol_o - sequential column index set for retrieving mat->B
3156 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3157  */
3158 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3159 {
3160   Vec             x, cmap;
3161   const PetscInt *is_idx;
3162   PetscScalar    *xarray, *cmaparray;
3163   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3164   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3165   Mat             B    = a->B;
3166   Vec             lvec = a->lvec, lcmap;
3167   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3168   MPI_Comm        comm;
3169   VecScatter      Mvctx = a->Mvctx;
3170 
3171   PetscFunctionBegin;
3172   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3173   PetscCall(ISGetLocalSize(iscol, &ncols));
3174 
3175   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3176   PetscCall(MatCreateVecs(mat, &x, NULL));
3177   PetscCall(VecSet(x, -1.0));
3178   PetscCall(VecDuplicate(x, &cmap));
3179   PetscCall(VecSet(cmap, -1.0));
3180 
3181   /* Get start indices */
3182   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3183   isstart -= ncols;
3184   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3185 
3186   PetscCall(ISGetIndices(iscol, &is_idx));
3187   PetscCall(VecGetArray(x, &xarray));
3188   PetscCall(VecGetArray(cmap, &cmaparray));
3189   PetscCall(PetscMalloc1(ncols, &idx));
3190   for (i = 0; i < ncols; i++) {
3191     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3192     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3193     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3194   }
3195   PetscCall(VecRestoreArray(x, &xarray));
3196   PetscCall(VecRestoreArray(cmap, &cmaparray));
3197   PetscCall(ISRestoreIndices(iscol, &is_idx));
3198 
3199   /* Get iscol_d */
3200   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3201   PetscCall(ISGetBlockSize(iscol, &i));
3202   PetscCall(ISSetBlockSize(*iscol_d, i));
3203 
3204   /* Get isrow_d */
3205   PetscCall(ISGetLocalSize(isrow, &m));
3206   rstart = mat->rmap->rstart;
3207   PetscCall(PetscMalloc1(m, &idx));
3208   PetscCall(ISGetIndices(isrow, &is_idx));
3209   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3210   PetscCall(ISRestoreIndices(isrow, &is_idx));
3211 
3212   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3213   PetscCall(ISGetBlockSize(isrow, &i));
3214   PetscCall(ISSetBlockSize(*isrow_d, i));
3215 
3216   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3217   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3218   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3219 
3220   PetscCall(VecDuplicate(lvec, &lcmap));
3221 
3222   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3223   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3224 
3225   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3226   /* off-process column indices */
3227   count = 0;
3228   PetscCall(PetscMalloc1(Bn, &idx));
3229   PetscCall(PetscMalloc1(Bn, &cmap1));
3230 
3231   PetscCall(VecGetArray(lvec, &xarray));
3232   PetscCall(VecGetArray(lcmap, &cmaparray));
3233   for (i = 0; i < Bn; i++) {
3234     if (PetscRealPart(xarray[i]) > -1.0) {
3235       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3236       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3237       count++;
3238     }
3239   }
3240   PetscCall(VecRestoreArray(lvec, &xarray));
3241   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3242 
3243   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3244   /* cannot ensure iscol_o has same blocksize as iscol! */
3245 
3246   PetscCall(PetscFree(idx));
3247   *garray = cmap1;
3248 
3249   PetscCall(VecDestroy(&x));
3250   PetscCall(VecDestroy(&cmap));
3251   PetscCall(VecDestroy(&lcmap));
3252   PetscFunctionReturn(PETSC_SUCCESS);
3253 }
3254 
3255 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3256 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3257 {
3258   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3259   Mat         M = NULL;
3260   MPI_Comm    comm;
3261   IS          iscol_d, isrow_d, iscol_o;
3262   Mat         Asub = NULL, Bsub = NULL;
3263   PetscInt    n;
3264 
3265   PetscFunctionBegin;
3266   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3267 
3268   if (call == MAT_REUSE_MATRIX) {
3269     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3270     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3271     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3272 
3273     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3274     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3275 
3276     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3277     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3278 
3279     /* Update diagonal and off-diagonal portions of submat */
3280     asub = (Mat_MPIAIJ *)(*submat)->data;
3281     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3282     PetscCall(ISGetLocalSize(iscol_o, &n));
3283     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3284     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3285     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3286 
3287   } else { /* call == MAT_INITIAL_MATRIX) */
3288     const PetscInt *garray;
3289     PetscInt        BsubN;
3290 
3291     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3292     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3293 
3294     /* Create local submatrices Asub and Bsub */
3295     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3296     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3297 
3298     /* Create submatrix M */
3299     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3300 
3301     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3302     asub = (Mat_MPIAIJ *)M->data;
3303 
3304     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3305     n = asub->B->cmap->N;
3306     if (BsubN > n) {
3307       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3308       const PetscInt *idx;
3309       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3310       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3311 
3312       PetscCall(PetscMalloc1(n, &idx_new));
3313       j = 0;
3314       PetscCall(ISGetIndices(iscol_o, &idx));
3315       for (i = 0; i < n; i++) {
3316         if (j >= BsubN) break;
3317         while (subgarray[i] > garray[j]) j++;
3318 
3319         if (subgarray[i] == garray[j]) {
3320           idx_new[i] = idx[j++];
3321         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3322       }
3323       PetscCall(ISRestoreIndices(iscol_o, &idx));
3324 
3325       PetscCall(ISDestroy(&iscol_o));
3326       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3327 
3328     } else if (BsubN < n) {
3329       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3330     }
3331 
3332     PetscCall(PetscFree(garray));
3333     *submat = M;
3334 
3335     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3336     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3337     PetscCall(ISDestroy(&isrow_d));
3338 
3339     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3340     PetscCall(ISDestroy(&iscol_d));
3341 
3342     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3343     PetscCall(ISDestroy(&iscol_o));
3344   }
3345   PetscFunctionReturn(PETSC_SUCCESS);
3346 }
3347 
3348 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3349 {
3350   IS        iscol_local = NULL, isrow_d;
3351   PetscInt  csize;
3352   PetscInt  n, i, j, start, end;
3353   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3354   MPI_Comm  comm;
3355 
3356   PetscFunctionBegin;
3357   /* If isrow has same processor distribution as mat,
3358      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3359   if (call == MAT_REUSE_MATRIX) {
3360     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3361     if (isrow_d) {
3362       sameRowDist  = PETSC_TRUE;
3363       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3364     } else {
3365       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3366       if (iscol_local) {
3367         sameRowDist  = PETSC_TRUE;
3368         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3369       }
3370     }
3371   } else {
3372     /* Check if isrow has same processor distribution as mat */
3373     sameDist[0] = PETSC_FALSE;
3374     PetscCall(ISGetLocalSize(isrow, &n));
3375     if (!n) {
3376       sameDist[0] = PETSC_TRUE;
3377     } else {
3378       PetscCall(ISGetMinMax(isrow, &i, &j));
3379       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3380       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3381     }
3382 
3383     /* Check if iscol has same processor distribution as mat */
3384     sameDist[1] = PETSC_FALSE;
3385     PetscCall(ISGetLocalSize(iscol, &n));
3386     if (!n) {
3387       sameDist[1] = PETSC_TRUE;
3388     } else {
3389       PetscCall(ISGetMinMax(iscol, &i, &j));
3390       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3391       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3392     }
3393 
3394     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3395     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3396     sameRowDist = tsameDist[0];
3397   }
3398 
3399   if (sameRowDist) {
3400     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3401       /* isrow and iscol have same processor distribution as mat */
3402       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3403       PetscFunctionReturn(PETSC_SUCCESS);
3404     } else { /* sameRowDist */
3405       /* isrow has same processor distribution as mat */
3406       if (call == MAT_INITIAL_MATRIX) {
3407         PetscBool sorted;
3408         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3409         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3410         PetscCall(ISGetSize(iscol, &i));
3411         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3412 
3413         PetscCall(ISSorted(iscol_local, &sorted));
3414         if (sorted) {
3415           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3416           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3417           PetscFunctionReturn(PETSC_SUCCESS);
3418         }
3419       } else { /* call == MAT_REUSE_MATRIX */
3420         IS iscol_sub;
3421         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3422         if (iscol_sub) {
3423           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3424           PetscFunctionReturn(PETSC_SUCCESS);
3425         }
3426       }
3427     }
3428   }
3429 
3430   /* General case: iscol -> iscol_local which has global size of iscol */
3431   if (call == MAT_REUSE_MATRIX) {
3432     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3433     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3434   } else {
3435     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3436   }
3437 
3438   PetscCall(ISGetLocalSize(iscol, &csize));
3439   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3440 
3441   if (call == MAT_INITIAL_MATRIX) {
3442     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3443     PetscCall(ISDestroy(&iscol_local));
3444   }
3445   PetscFunctionReturn(PETSC_SUCCESS);
3446 }
3447 
3448 /*@C
3449   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3450   and "off-diagonal" part of the matrix in CSR format.
3451 
3452   Collective
3453 
3454   Input Parameters:
3455 + comm   - MPI communicator
3456 . A      - "diagonal" portion of matrix
3457 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3458 - garray - global index of `B` columns
3459 
3460   Output Parameter:
3461 . mat - the matrix, with input `A` as its local diagonal matrix
3462 
3463   Level: advanced
3464 
3465   Notes:
3466   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3467 
3468   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3469 
3470 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3471 @*/
3472 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3473 {
3474   Mat_MPIAIJ        *maij;
3475   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3476   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3477   const PetscScalar *oa;
3478   Mat                Bnew;
3479   PetscInt           m, n, N;
3480   MatType            mpi_mat_type;
3481 
3482   PetscFunctionBegin;
3483   PetscCall(MatCreate(comm, mat));
3484   PetscCall(MatGetSize(A, &m, &n));
3485   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3486   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3487   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3488   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3489 
3490   /* Get global columns of mat */
3491   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3492 
3493   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3494   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3495   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3496   PetscCall(MatSetType(*mat, mpi_mat_type));
3497 
3498   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3499   maij = (Mat_MPIAIJ *)(*mat)->data;
3500 
3501   (*mat)->preallocated = PETSC_TRUE;
3502 
3503   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3504   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3505 
3506   /* Set A as diagonal portion of *mat */
3507   maij->A = A;
3508 
3509   nz = oi[m];
3510   for (i = 0; i < nz; i++) {
3511     col   = oj[i];
3512     oj[i] = garray[col];
3513   }
3514 
3515   /* Set Bnew as off-diagonal portion of *mat */
3516   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3517   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3518   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3519   bnew        = (Mat_SeqAIJ *)Bnew->data;
3520   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3521   maij->B     = Bnew;
3522 
3523   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3524 
3525   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3526   b->free_a       = PETSC_FALSE;
3527   b->free_ij      = PETSC_FALSE;
3528   PetscCall(MatDestroy(&B));
3529 
3530   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3531   bnew->free_a       = PETSC_TRUE;
3532   bnew->free_ij      = PETSC_TRUE;
3533 
3534   /* condense columns of maij->B */
3535   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3536   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3537   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3538   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3539   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3540   PetscFunctionReturn(PETSC_SUCCESS);
3541 }
3542 
3543 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3544 
3545 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3546 {
3547   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3548   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3549   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3550   Mat             M, Msub, B = a->B;
3551   MatScalar      *aa;
3552   Mat_SeqAIJ     *aij;
3553   PetscInt       *garray = a->garray, *colsub, Ncols;
3554   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3555   IS              iscol_sub, iscmap;
3556   const PetscInt *is_idx, *cmap;
3557   PetscBool       allcolumns = PETSC_FALSE;
3558   MPI_Comm        comm;
3559 
3560   PetscFunctionBegin;
3561   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3562   if (call == MAT_REUSE_MATRIX) {
3563     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3564     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3565     PetscCall(ISGetLocalSize(iscol_sub, &count));
3566 
3567     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3568     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3569 
3570     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3571     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3572 
3573     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3574 
3575   } else { /* call == MAT_INITIAL_MATRIX) */
3576     PetscBool flg;
3577 
3578     PetscCall(ISGetLocalSize(iscol, &n));
3579     PetscCall(ISGetSize(iscol, &Ncols));
3580 
3581     /* (1) iscol -> nonscalable iscol_local */
3582     /* Check for special case: each processor gets entire matrix columns */
3583     PetscCall(ISIdentity(iscol_local, &flg));
3584     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3585     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3586     if (allcolumns) {
3587       iscol_sub = iscol_local;
3588       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3589       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3590 
3591     } else {
3592       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3593       PetscInt *idx, *cmap1, k;
3594       PetscCall(PetscMalloc1(Ncols, &idx));
3595       PetscCall(PetscMalloc1(Ncols, &cmap1));
3596       PetscCall(ISGetIndices(iscol_local, &is_idx));
3597       count = 0;
3598       k     = 0;
3599       for (i = 0; i < Ncols; i++) {
3600         j = is_idx[i];
3601         if (j >= cstart && j < cend) {
3602           /* diagonal part of mat */
3603           idx[count]     = j;
3604           cmap1[count++] = i; /* column index in submat */
3605         } else if (Bn) {
3606           /* off-diagonal part of mat */
3607           if (j == garray[k]) {
3608             idx[count]     = j;
3609             cmap1[count++] = i; /* column index in submat */
3610           } else if (j > garray[k]) {
3611             while (j > garray[k] && k < Bn - 1) k++;
3612             if (j == garray[k]) {
3613               idx[count]     = j;
3614               cmap1[count++] = i; /* column index in submat */
3615             }
3616           }
3617         }
3618       }
3619       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3620 
3621       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3622       PetscCall(ISGetBlockSize(iscol, &cbs));
3623       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3624 
3625       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3626     }
3627 
3628     /* (3) Create sequential Msub */
3629     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3630   }
3631 
3632   PetscCall(ISGetLocalSize(iscol_sub, &count));
3633   aij = (Mat_SeqAIJ *)(Msub)->data;
3634   ii  = aij->i;
3635   PetscCall(ISGetIndices(iscmap, &cmap));
3636 
3637   /*
3638       m - number of local rows
3639       Ncols - number of columns (same on all processors)
3640       rstart - first row in new global matrix generated
3641   */
3642   PetscCall(MatGetSize(Msub, &m, NULL));
3643 
3644   if (call == MAT_INITIAL_MATRIX) {
3645     /* (4) Create parallel newmat */
3646     PetscMPIInt rank, size;
3647     PetscInt    csize;
3648 
3649     PetscCallMPI(MPI_Comm_size(comm, &size));
3650     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3651 
3652     /*
3653         Determine the number of non-zeros in the diagonal and off-diagonal
3654         portions of the matrix in order to do correct preallocation
3655     */
3656 
3657     /* first get start and end of "diagonal" columns */
3658     PetscCall(ISGetLocalSize(iscol, &csize));
3659     if (csize == PETSC_DECIDE) {
3660       PetscCall(ISGetSize(isrow, &mglobal));
3661       if (mglobal == Ncols) { /* square matrix */
3662         nlocal = m;
3663       } else {
3664         nlocal = Ncols / size + ((Ncols % size) > rank);
3665       }
3666     } else {
3667       nlocal = csize;
3668     }
3669     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3670     rstart = rend - nlocal;
3671     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3672 
3673     /* next, compute all the lengths */
3674     jj = aij->j;
3675     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3676     olens = dlens + m;
3677     for (i = 0; i < m; i++) {
3678       jend = ii[i + 1] - ii[i];
3679       olen = 0;
3680       dlen = 0;
3681       for (j = 0; j < jend; j++) {
3682         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3683         else dlen++;
3684         jj++;
3685       }
3686       olens[i] = olen;
3687       dlens[i] = dlen;
3688     }
3689 
3690     PetscCall(ISGetBlockSize(isrow, &bs));
3691     PetscCall(ISGetBlockSize(iscol, &cbs));
3692 
3693     PetscCall(MatCreate(comm, &M));
3694     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3695     PetscCall(MatSetBlockSizes(M, bs, cbs));
3696     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3697     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3698     PetscCall(PetscFree(dlens));
3699 
3700   } else { /* call == MAT_REUSE_MATRIX */
3701     M = *newmat;
3702     PetscCall(MatGetLocalSize(M, &i, NULL));
3703     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3704     PetscCall(MatZeroEntries(M));
3705     /*
3706          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3707        rather than the slower MatSetValues().
3708     */
3709     M->was_assembled = PETSC_TRUE;
3710     M->assembled     = PETSC_FALSE;
3711   }
3712 
3713   /* (5) Set values of Msub to *newmat */
3714   PetscCall(PetscMalloc1(count, &colsub));
3715   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3716 
3717   jj = aij->j;
3718   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3719   for (i = 0; i < m; i++) {
3720     row = rstart + i;
3721     nz  = ii[i + 1] - ii[i];
3722     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3723     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3724     jj += nz;
3725     aa += nz;
3726   }
3727   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3728   PetscCall(ISRestoreIndices(iscmap, &cmap));
3729 
3730   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3731   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3732 
3733   PetscCall(PetscFree(colsub));
3734 
3735   /* save Msub, iscol_sub and iscmap used in processor for next request */
3736   if (call == MAT_INITIAL_MATRIX) {
3737     *newmat = M;
3738     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3739     PetscCall(MatDestroy(&Msub));
3740 
3741     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3742     PetscCall(ISDestroy(&iscol_sub));
3743 
3744     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3745     PetscCall(ISDestroy(&iscmap));
3746 
3747     if (iscol_local) {
3748       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3749       PetscCall(ISDestroy(&iscol_local));
3750     }
3751   }
3752   PetscFunctionReturn(PETSC_SUCCESS);
3753 }
3754 
3755 /*
3756     Not great since it makes two copies of the submatrix, first an SeqAIJ
3757   in local and then by concatenating the local matrices the end result.
3758   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3759 
3760   This requires a sequential iscol with all indices.
3761 */
3762 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3763 {
3764   PetscMPIInt rank, size;
3765   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3766   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3767   Mat         M, Mreuse;
3768   MatScalar  *aa, *vwork;
3769   MPI_Comm    comm;
3770   Mat_SeqAIJ *aij;
3771   PetscBool   colflag, allcolumns = PETSC_FALSE;
3772 
3773   PetscFunctionBegin;
3774   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3775   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3776   PetscCallMPI(MPI_Comm_size(comm, &size));
3777 
3778   /* Check for special case: each processor gets entire matrix columns */
3779   PetscCall(ISIdentity(iscol, &colflag));
3780   PetscCall(ISGetLocalSize(iscol, &n));
3781   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3782   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3783 
3784   if (call == MAT_REUSE_MATRIX) {
3785     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3786     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3787     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3788   } else {
3789     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3790   }
3791 
3792   /*
3793       m - number of local rows
3794       n - number of columns (same on all processors)
3795       rstart - first row in new global matrix generated
3796   */
3797   PetscCall(MatGetSize(Mreuse, &m, &n));
3798   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3799   if (call == MAT_INITIAL_MATRIX) {
3800     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3801     ii  = aij->i;
3802     jj  = aij->j;
3803 
3804     /*
3805         Determine the number of non-zeros in the diagonal and off-diagonal
3806         portions of the matrix in order to do correct preallocation
3807     */
3808 
3809     /* first get start and end of "diagonal" columns */
3810     if (csize == PETSC_DECIDE) {
3811       PetscCall(ISGetSize(isrow, &mglobal));
3812       if (mglobal == n) { /* square matrix */
3813         nlocal = m;
3814       } else {
3815         nlocal = n / size + ((n % size) > rank);
3816       }
3817     } else {
3818       nlocal = csize;
3819     }
3820     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3821     rstart = rend - nlocal;
3822     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3823 
3824     /* next, compute all the lengths */
3825     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3826     olens = dlens + m;
3827     for (i = 0; i < m; i++) {
3828       jend = ii[i + 1] - ii[i];
3829       olen = 0;
3830       dlen = 0;
3831       for (j = 0; j < jend; j++) {
3832         if (*jj < rstart || *jj >= rend) olen++;
3833         else dlen++;
3834         jj++;
3835       }
3836       olens[i] = olen;
3837       dlens[i] = dlen;
3838     }
3839     PetscCall(MatCreate(comm, &M));
3840     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3841     PetscCall(MatSetBlockSizes(M, bs, cbs));
3842     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3843     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3844     PetscCall(PetscFree(dlens));
3845   } else {
3846     PetscInt ml, nl;
3847 
3848     M = *newmat;
3849     PetscCall(MatGetLocalSize(M, &ml, &nl));
3850     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3851     PetscCall(MatZeroEntries(M));
3852     /*
3853          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3854        rather than the slower MatSetValues().
3855     */
3856     M->was_assembled = PETSC_TRUE;
3857     M->assembled     = PETSC_FALSE;
3858   }
3859   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3860   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3861   ii  = aij->i;
3862   jj  = aij->j;
3863 
3864   /* trigger copy to CPU if needed */
3865   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3866   for (i = 0; i < m; i++) {
3867     row   = rstart + i;
3868     nz    = ii[i + 1] - ii[i];
3869     cwork = jj;
3870     jj    = PetscSafePointerPlusOffset(jj, nz);
3871     vwork = aa;
3872     aa    = PetscSafePointerPlusOffset(aa, nz);
3873     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3874   }
3875   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3876 
3877   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3878   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3879   *newmat = M;
3880 
3881   /* save submatrix used in processor for next request */
3882   if (call == MAT_INITIAL_MATRIX) {
3883     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3884     PetscCall(MatDestroy(&Mreuse));
3885   }
3886   PetscFunctionReturn(PETSC_SUCCESS);
3887 }
3888 
3889 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3890 {
3891   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3892   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3893   const PetscInt *JJ;
3894   PetscBool       nooffprocentries;
3895   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3896 
3897   PetscFunctionBegin;
3898   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3899 
3900   PetscCall(PetscLayoutSetUp(B->rmap));
3901   PetscCall(PetscLayoutSetUp(B->cmap));
3902   m      = B->rmap->n;
3903   cstart = B->cmap->rstart;
3904   cend   = B->cmap->rend;
3905   rstart = B->rmap->rstart;
3906 
3907   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3908 
3909   if (PetscDefined(USE_DEBUG)) {
3910     for (i = 0; i < m; i++) {
3911       nnz = Ii[i + 1] - Ii[i];
3912       JJ  = PetscSafePointerPlusOffset(J, Ii[i]);
3913       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3914       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3915       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3916     }
3917   }
3918 
3919   for (i = 0; i < m; i++) {
3920     nnz     = Ii[i + 1] - Ii[i];
3921     JJ      = PetscSafePointerPlusOffset(J, Ii[i]);
3922     nnz_max = PetscMax(nnz_max, nnz);
3923     d       = 0;
3924     for (j = 0; j < nnz; j++) {
3925       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3926     }
3927     d_nnz[i] = d;
3928     o_nnz[i] = nnz - d;
3929   }
3930   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3931   PetscCall(PetscFree2(d_nnz, o_nnz));
3932 
3933   for (i = 0; i < m; i++) {
3934     ii = i + rstart;
3935     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES));
3936   }
3937   nooffprocentries    = B->nooffprocentries;
3938   B->nooffprocentries = PETSC_TRUE;
3939   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3940   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3941   B->nooffprocentries = nooffprocentries;
3942 
3943   /* count number of entries below block diagonal */
3944   PetscCall(PetscFree(Aij->ld));
3945   PetscCall(PetscCalloc1(m, &ld));
3946   Aij->ld = ld;
3947   for (i = 0; i < m; i++) {
3948     nnz = Ii[i + 1] - Ii[i];
3949     j   = 0;
3950     while (j < nnz && J[j] < cstart) j++;
3951     ld[i] = j;
3952     if (J) J += nnz;
3953   }
3954 
3955   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3956   PetscFunctionReturn(PETSC_SUCCESS);
3957 }
3958 
3959 /*@
3960   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3961   (the default parallel PETSc format).
3962 
3963   Collective
3964 
3965   Input Parameters:
3966 + B - the matrix
3967 . i - the indices into j for the start of each local row (starts with zero)
3968 . j - the column indices for each local row (starts with zero)
3969 - v - optional values in the matrix
3970 
3971   Level: developer
3972 
3973   Notes:
3974   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3975   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3976   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3977 
3978   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3979 
3980   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3981 
3982   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3983 
3984   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3985   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3986 
3987   The format which is used for the sparse matrix input, is equivalent to a
3988   row-major ordering.. i.e for the following matrix, the input data expected is
3989   as shown
3990 .vb
3991         1 0 0
3992         2 0 3     P0
3993        -------
3994         4 5 6     P1
3995 
3996      Process0 [P0] rows_owned=[0,1]
3997         i =  {0,1,3}  [size = nrow+1  = 2+1]
3998         j =  {0,0,2}  [size = 3]
3999         v =  {1,2,3}  [size = 3]
4000 
4001      Process1 [P1] rows_owned=[2]
4002         i =  {0,3}    [size = nrow+1  = 1+1]
4003         j =  {0,1,2}  [size = 3]
4004         v =  {4,5,6}  [size = 3]
4005 .ve
4006 
4007 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4008           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4009 @*/
4010 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4011 {
4012   PetscFunctionBegin;
4013   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4014   PetscFunctionReturn(PETSC_SUCCESS);
4015 }
4016 
4017 /*@C
4018   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4019   (the default parallel PETSc format).  For good matrix assembly performance
4020   the user should preallocate the matrix storage by setting the parameters
4021   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4022 
4023   Collective
4024 
4025   Input Parameters:
4026 + B     - the matrix
4027 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4028            (same value is used for all local rows)
4029 . d_nnz - array containing the number of nonzeros in the various rows of the
4030            DIAGONAL portion of the local submatrix (possibly different for each row)
4031            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4032            The size of this array is equal to the number of local rows, i.e 'm'.
4033            For matrices that will be factored, you must leave room for (and set)
4034            the diagonal entry even if it is zero.
4035 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4036            submatrix (same value is used for all local rows).
4037 - o_nnz - array containing the number of nonzeros in the various rows of the
4038            OFF-DIAGONAL portion of the local submatrix (possibly different for
4039            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4040            structure. The size of this array is equal to the number
4041            of local rows, i.e 'm'.
4042 
4043   Example Usage:
4044   Consider the following 8x8 matrix with 34 non-zero values, that is
4045   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4046   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4047   as follows
4048 
4049 .vb
4050             1  2  0  |  0  3  0  |  0  4
4051     Proc0   0  5  6  |  7  0  0  |  8  0
4052             9  0 10  | 11  0  0  | 12  0
4053     -------------------------------------
4054            13  0 14  | 15 16 17  |  0  0
4055     Proc1   0 18  0  | 19 20 21  |  0  0
4056             0  0  0  | 22 23  0  | 24  0
4057     -------------------------------------
4058     Proc2  25 26 27  |  0  0 28  | 29  0
4059            30  0  0  | 31 32 33  |  0 34
4060 .ve
4061 
4062   This can be represented as a collection of submatrices as
4063 .vb
4064       A B C
4065       D E F
4066       G H I
4067 .ve
4068 
4069   Where the submatrices A,B,C are owned by proc0, D,E,F are
4070   owned by proc1, G,H,I are owned by proc2.
4071 
4072   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4073   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4074   The 'M','N' parameters are 8,8, and have the same values on all procs.
4075 
4076   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4077   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4078   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4079   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4080   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4081   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4082 
4083   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4084   allocated for every row of the local diagonal submatrix, and `o_nz`
4085   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4086   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4087   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4088   In this case, the values of `d_nz`, `o_nz` are
4089 .vb
4090      proc0  dnz = 2, o_nz = 2
4091      proc1  dnz = 3, o_nz = 2
4092      proc2  dnz = 1, o_nz = 4
4093 .ve
4094   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4095   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4096   for proc3. i.e we are using 12+15+10=37 storage locations to store
4097   34 values.
4098 
4099   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4100   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4101   In the above case the values for `d_nnz`, `o_nnz` are
4102 .vb
4103      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4104      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4105      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4106 .ve
4107   Here the space allocated is sum of all the above values i.e 34, and
4108   hence pre-allocation is perfect.
4109 
4110   Level: intermediate
4111 
4112   Notes:
4113   If the *_nnz parameter is given then the *_nz parameter is ignored
4114 
4115   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4116   storage.  The stored row and column indices begin with zero.
4117   See [Sparse Matrices](sec_matsparse) for details.
4118 
4119   The parallel matrix is partitioned such that the first m0 rows belong to
4120   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4121   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4122 
4123   The DIAGONAL portion of the local submatrix of a processor can be defined
4124   as the submatrix which is obtained by extraction the part corresponding to
4125   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4126   first row that belongs to the processor, r2 is the last row belonging to
4127   the this processor, and c1-c2 is range of indices of the local part of a
4128   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4129   common case of a square matrix, the row and column ranges are the same and
4130   the DIAGONAL part is also square. The remaining portion of the local
4131   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4132 
4133   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4134 
4135   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4136   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4137   You can also run with the option `-info` and look for messages with the string
4138   malloc in them to see if additional memory allocation was needed.
4139 
4140 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4141           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4142 @*/
4143 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4144 {
4145   PetscFunctionBegin;
4146   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4147   PetscValidType(B, 1);
4148   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4149   PetscFunctionReturn(PETSC_SUCCESS);
4150 }
4151 
4152 /*@
4153   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4154   CSR format for the local rows.
4155 
4156   Collective
4157 
4158   Input Parameters:
4159 + comm - MPI communicator
4160 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4161 . n    - This value should be the same as the local size used in creating the
4162        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4163        calculated if N is given) For square matrices n is almost always m.
4164 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4165 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4166 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4167 . j    - global column indices
4168 - a    - optional matrix values
4169 
4170   Output Parameter:
4171 . mat - the matrix
4172 
4173   Level: intermediate
4174 
4175   Notes:
4176   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4177   thus you CANNOT change the matrix entries by changing the values of a[] after you have
4178   called this routine. Use `MatCreateMPIAIJWithSplitArray()` to avoid needing to copy the arrays.
4179 
4180   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4181 
4182   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4183 
4184   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4185   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4186 
4187   The format which is used for the sparse matrix input, is equivalent to a
4188   row-major ordering.. i.e for the following matrix, the input data expected is
4189   as shown
4190 .vb
4191         1 0 0
4192         2 0 3     P0
4193        -------
4194         4 5 6     P1
4195 
4196      Process0 [P0] rows_owned=[0,1]
4197         i =  {0,1,3}  [size = nrow+1  = 2+1]
4198         j =  {0,0,2}  [size = 3]
4199         v =  {1,2,3}  [size = 3]
4200 
4201      Process1 [P1] rows_owned=[2]
4202         i =  {0,3}    [size = nrow+1  = 1+1]
4203         j =  {0,1,2}  [size = 3]
4204         v =  {4,5,6}  [size = 3]
4205 .ve
4206 
4207 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4208           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4209 @*/
4210 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4211 {
4212   PetscFunctionBegin;
4213   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4214   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4215   PetscCall(MatCreate(comm, mat));
4216   PetscCall(MatSetSizes(*mat, m, n, M, N));
4217   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4218   PetscCall(MatSetType(*mat, MATMPIAIJ));
4219   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4220   PetscFunctionReturn(PETSC_SUCCESS);
4221 }
4222 
4223 /*@
4224   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4225   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4226   from `MatCreateMPIAIJWithArrays()`
4227 
4228   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4229 
4230   Collective
4231 
4232   Input Parameters:
4233 + mat - the matrix
4234 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4235 . n   - This value should be the same as the local size used in creating the
4236        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4237        calculated if N is given) For square matrices n is almost always m.
4238 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4239 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4240 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4241 . J   - column indices
4242 - v   - matrix values
4243 
4244   Level: deprecated
4245 
4246 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4247           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4248 @*/
4249 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4250 {
4251   PetscInt        nnz, i;
4252   PetscBool       nooffprocentries;
4253   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4254   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4255   PetscScalar    *ad, *ao;
4256   PetscInt        ldi, Iii, md;
4257   const PetscInt *Adi = Ad->i;
4258   PetscInt       *ld  = Aij->ld;
4259 
4260   PetscFunctionBegin;
4261   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4262   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4263   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4264   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4265 
4266   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4267   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4268 
4269   for (i = 0; i < m; i++) {
4270     if (PetscDefined(USE_DEBUG)) {
4271       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4272         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4273         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4274       }
4275     }
4276     nnz = Ii[i + 1] - Ii[i];
4277     Iii = Ii[i];
4278     ldi = ld[i];
4279     md  = Adi[i + 1] - Adi[i];
4280     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4281     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4282     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4283     ad += md;
4284     ao += nnz - md;
4285   }
4286   nooffprocentries      = mat->nooffprocentries;
4287   mat->nooffprocentries = PETSC_TRUE;
4288   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4289   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4290   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4291   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4292   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4293   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4294   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4295   mat->nooffprocentries = nooffprocentries;
4296   PetscFunctionReturn(PETSC_SUCCESS);
4297 }
4298 
4299 /*@
4300   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4301 
4302   Collective
4303 
4304   Input Parameters:
4305 + mat - the matrix
4306 - v   - matrix values, stored by row
4307 
4308   Level: intermediate
4309 
4310   Notes:
4311   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4312 
4313   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4314 
4315 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4316           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4317 @*/
4318 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4319 {
4320   PetscInt        nnz, i, m;
4321   PetscBool       nooffprocentries;
4322   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4323   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4324   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4325   PetscScalar    *ad, *ao;
4326   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4327   PetscInt        ldi, Iii, md;
4328   PetscInt       *ld = Aij->ld;
4329 
4330   PetscFunctionBegin;
4331   m = mat->rmap->n;
4332 
4333   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4334   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4335   Iii = 0;
4336   for (i = 0; i < m; i++) {
4337     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4338     ldi = ld[i];
4339     md  = Adi[i + 1] - Adi[i];
4340     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4341     ad += md;
4342     if (ao) {
4343       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4344       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4345       ao += nnz - md;
4346     }
4347     Iii += nnz;
4348   }
4349   nooffprocentries      = mat->nooffprocentries;
4350   mat->nooffprocentries = PETSC_TRUE;
4351   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4352   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4353   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4354   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4355   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4356   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4357   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4358   mat->nooffprocentries = nooffprocentries;
4359   PetscFunctionReturn(PETSC_SUCCESS);
4360 }
4361 
4362 /*@C
4363   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4364   (the default parallel PETSc format).  For good matrix assembly performance
4365   the user should preallocate the matrix storage by setting the parameters
4366   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4367 
4368   Collective
4369 
4370   Input Parameters:
4371 + comm  - MPI communicator
4372 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4373            This value should be the same as the local size used in creating the
4374            y vector for the matrix-vector product y = Ax.
4375 . n     - This value should be the same as the local size used in creating the
4376        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4377        calculated if N is given) For square matrices n is almost always m.
4378 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4379 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4380 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4381            (same value is used for all local rows)
4382 . d_nnz - array containing the number of nonzeros in the various rows of the
4383            DIAGONAL portion of the local submatrix (possibly different for each row)
4384            or `NULL`, if `d_nz` is used to specify the nonzero structure.
4385            The size of this array is equal to the number of local rows, i.e 'm'.
4386 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4387            submatrix (same value is used for all local rows).
4388 - o_nnz - array containing the number of nonzeros in the various rows of the
4389            OFF-DIAGONAL portion of the local submatrix (possibly different for
4390            each row) or `NULL`, if `o_nz` is used to specify the nonzero
4391            structure. The size of this array is equal to the number
4392            of local rows, i.e 'm'.
4393 
4394   Output Parameter:
4395 . A - the matrix
4396 
4397   Options Database Keys:
4398 + -mat_no_inode                     - Do not use inodes
4399 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4400 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4401         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4402         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4403 
4404   Level: intermediate
4405 
4406   Notes:
4407   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4408   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4409   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4410 
4411   If the *_nnz parameter is given then the *_nz parameter is ignored
4412 
4413   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4414   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4415   storage requirements for this matrix.
4416 
4417   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4418   processor than it must be used on all processors that share the object for
4419   that argument.
4420 
4421   The user MUST specify either the local or global matrix dimensions
4422   (possibly both).
4423 
4424   The parallel matrix is partitioned across processors such that the
4425   first m0 rows belong to process 0, the next m1 rows belong to
4426   process 1, the next m2 rows belong to process 2 etc.. where
4427   m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4428   values corresponding to [m x N] submatrix.
4429 
4430   The columns are logically partitioned with the n0 columns belonging
4431   to 0th partition, the next n1 columns belonging to the next
4432   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4433 
4434   The DIAGONAL portion of the local submatrix on any given processor
4435   is the submatrix corresponding to the rows and columns m,n
4436   corresponding to the given processor. i.e diagonal matrix on
4437   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4438   etc. The remaining portion of the local submatrix [m x (N-n)]
4439   constitute the OFF-DIAGONAL portion. The example below better
4440   illustrates this concept.
4441 
4442   For a square global matrix we define each processor's diagonal portion
4443   to be its local rows and the corresponding columns (a square submatrix);
4444   each processor's off-diagonal portion encompasses the remainder of the
4445   local matrix (a rectangular submatrix).
4446 
4447   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4448 
4449   When calling this routine with a single process communicator, a matrix of
4450   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4451   type of communicator, use the construction mechanism
4452 .vb
4453   MatCreate(..., &A);
4454   MatSetType(A, MATMPIAIJ);
4455   MatSetSizes(A, m, n, M, N);
4456   MatMPIAIJSetPreallocation(A, ...);
4457 .ve
4458 
4459   By default, this format uses inodes (identical nodes) when possible.
4460   We search for consecutive rows with the same nonzero structure, thereby
4461   reusing matrix information to achieve increased efficiency.
4462 
4463   Example Usage:
4464   Consider the following 8x8 matrix with 34 non-zero values, that is
4465   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4466   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4467   as follows
4468 
4469 .vb
4470             1  2  0  |  0  3  0  |  0  4
4471     Proc0   0  5  6  |  7  0  0  |  8  0
4472             9  0 10  | 11  0  0  | 12  0
4473     -------------------------------------
4474            13  0 14  | 15 16 17  |  0  0
4475     Proc1   0 18  0  | 19 20 21  |  0  0
4476             0  0  0  | 22 23  0  | 24  0
4477     -------------------------------------
4478     Proc2  25 26 27  |  0  0 28  | 29  0
4479            30  0  0  | 31 32 33  |  0 34
4480 .ve
4481 
4482   This can be represented as a collection of submatrices as
4483 
4484 .vb
4485       A B C
4486       D E F
4487       G H I
4488 .ve
4489 
4490   Where the submatrices A,B,C are owned by proc0, D,E,F are
4491   owned by proc1, G,H,I are owned by proc2.
4492 
4493   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4494   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4495   The 'M','N' parameters are 8,8, and have the same values on all procs.
4496 
4497   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4498   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4499   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4500   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4501   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4502   matrix, ans [DF] as another SeqAIJ matrix.
4503 
4504   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4505   allocated for every row of the local diagonal submatrix, and `o_nz`
4506   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4507   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4508   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4509   In this case, the values of `d_nz`,`o_nz` are
4510 .vb
4511      proc0  dnz = 2, o_nz = 2
4512      proc1  dnz = 3, o_nz = 2
4513      proc2  dnz = 1, o_nz = 4
4514 .ve
4515   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4516   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4517   for proc3. i.e we are using 12+15+10=37 storage locations to store
4518   34 values.
4519 
4520   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4521   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4522   In the above case the values for d_nnz,o_nnz are
4523 .vb
4524      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4525      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4526      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4527 .ve
4528   Here the space allocated is sum of all the above values i.e 34, and
4529   hence pre-allocation is perfect.
4530 
4531 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4532           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4533 @*/
4534 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4535 {
4536   PetscMPIInt size;
4537 
4538   PetscFunctionBegin;
4539   PetscCall(MatCreate(comm, A));
4540   PetscCall(MatSetSizes(*A, m, n, M, N));
4541   PetscCallMPI(MPI_Comm_size(comm, &size));
4542   if (size > 1) {
4543     PetscCall(MatSetType(*A, MATMPIAIJ));
4544     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4545   } else {
4546     PetscCall(MatSetType(*A, MATSEQAIJ));
4547     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4548   }
4549   PetscFunctionReturn(PETSC_SUCCESS);
4550 }
4551 
4552 /*MC
4553     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4554 
4555     Synopsis:
4556     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4557 
4558     Not Collective
4559 
4560     Input Parameter:
4561 .   A - the `MATMPIAIJ` matrix
4562 
4563     Output Parameters:
4564 +   Ad - the diagonal portion of the matrix
4565 .   Ao - the off-diagonal portion of the matrix
4566 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4567 -   ierr - error code
4568 
4569      Level: advanced
4570 
4571     Note:
4572     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4573 
4574 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4575 M*/
4576 
4577 /*MC
4578     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4579 
4580     Synopsis:
4581     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4582 
4583     Not Collective
4584 
4585     Input Parameters:
4586 +   A - the `MATMPIAIJ` matrix
4587 .   Ad - the diagonal portion of the matrix
4588 .   Ao - the off-diagonal portion of the matrix
4589 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4590 -   ierr - error code
4591 
4592      Level: advanced
4593 
4594 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4595 M*/
4596 
4597 /*@C
4598   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4599 
4600   Not Collective
4601 
4602   Input Parameter:
4603 . A - The `MATMPIAIJ` matrix
4604 
4605   Output Parameters:
4606 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4607 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4608 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4609 
4610   Level: intermediate
4611 
4612   Note:
4613   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4614   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4615   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4616   local column numbers to global column numbers in the original matrix.
4617 
4618   Fortran Notes:
4619   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4620 
4621 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4622 @*/
4623 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4624 {
4625   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4626   PetscBool   flg;
4627 
4628   PetscFunctionBegin;
4629   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4630   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4631   if (Ad) *Ad = a->A;
4632   if (Ao) *Ao = a->B;
4633   if (colmap) *colmap = a->garray;
4634   PetscFunctionReturn(PETSC_SUCCESS);
4635 }
4636 
4637 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4638 {
4639   PetscInt     m, N, i, rstart, nnz, Ii;
4640   PetscInt    *indx;
4641   PetscScalar *values;
4642   MatType      rootType;
4643 
4644   PetscFunctionBegin;
4645   PetscCall(MatGetSize(inmat, &m, &N));
4646   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4647     PetscInt *dnz, *onz, sum, bs, cbs;
4648 
4649     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4650     /* Check sum(n) = N */
4651     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4652     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4653 
4654     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4655     rstart -= m;
4656 
4657     MatPreallocateBegin(comm, m, n, dnz, onz);
4658     for (i = 0; i < m; i++) {
4659       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4660       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4661       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4662     }
4663 
4664     PetscCall(MatCreate(comm, outmat));
4665     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4666     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4667     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4668     PetscCall(MatGetRootType_Private(inmat, &rootType));
4669     PetscCall(MatSetType(*outmat, rootType));
4670     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4671     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4672     MatPreallocateEnd(dnz, onz);
4673     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4674   }
4675 
4676   /* numeric phase */
4677   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4678   for (i = 0; i < m; i++) {
4679     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4680     Ii = i + rstart;
4681     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4682     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4683   }
4684   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4685   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4686   PetscFunctionReturn(PETSC_SUCCESS);
4687 }
4688 
4689 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4690 {
4691   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4692 
4693   PetscFunctionBegin;
4694   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4695   PetscCall(PetscFree(merge->id_r));
4696   PetscCall(PetscFree(merge->len_s));
4697   PetscCall(PetscFree(merge->len_r));
4698   PetscCall(PetscFree(merge->bi));
4699   PetscCall(PetscFree(merge->bj));
4700   PetscCall(PetscFree(merge->buf_ri[0]));
4701   PetscCall(PetscFree(merge->buf_ri));
4702   PetscCall(PetscFree(merge->buf_rj[0]));
4703   PetscCall(PetscFree(merge->buf_rj));
4704   PetscCall(PetscFree(merge->coi));
4705   PetscCall(PetscFree(merge->coj));
4706   PetscCall(PetscFree(merge->owners_co));
4707   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4708   PetscCall(PetscFree(merge));
4709   PetscFunctionReturn(PETSC_SUCCESS);
4710 }
4711 
4712 #include <../src/mat/utils/freespace.h>
4713 #include <petscbt.h>
4714 
4715 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4716 {
4717   MPI_Comm             comm;
4718   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4719   PetscMPIInt          size, rank, taga, *len_s;
4720   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4721   PetscInt             proc, m;
4722   PetscInt           **buf_ri, **buf_rj;
4723   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4724   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4725   MPI_Request         *s_waits, *r_waits;
4726   MPI_Status          *status;
4727   const MatScalar     *aa, *a_a;
4728   MatScalar          **abuf_r, *ba_i;
4729   Mat_Merge_SeqsToMPI *merge;
4730   PetscContainer       container;
4731 
4732   PetscFunctionBegin;
4733   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4734   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4735 
4736   PetscCallMPI(MPI_Comm_size(comm, &size));
4737   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4738 
4739   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4740   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4741   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4742   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4743   aa = a_a;
4744 
4745   bi     = merge->bi;
4746   bj     = merge->bj;
4747   buf_ri = merge->buf_ri;
4748   buf_rj = merge->buf_rj;
4749 
4750   PetscCall(PetscMalloc1(size, &status));
4751   owners = merge->rowmap->range;
4752   len_s  = merge->len_s;
4753 
4754   /* send and recv matrix values */
4755   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4756   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4757 
4758   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4759   for (proc = 0, k = 0; proc < size; proc++) {
4760     if (!len_s[proc]) continue;
4761     i = owners[proc];
4762     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4763     k++;
4764   }
4765 
4766   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4767   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4768   PetscCall(PetscFree(status));
4769 
4770   PetscCall(PetscFree(s_waits));
4771   PetscCall(PetscFree(r_waits));
4772 
4773   /* insert mat values of mpimat */
4774   PetscCall(PetscMalloc1(N, &ba_i));
4775   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4776 
4777   for (k = 0; k < merge->nrecv; k++) {
4778     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4779     nrows       = *(buf_ri_k[k]);
4780     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4781     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4782   }
4783 
4784   /* set values of ba */
4785   m = merge->rowmap->n;
4786   for (i = 0; i < m; i++) {
4787     arow = owners[rank] + i;
4788     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4789     bnzi = bi[i + 1] - bi[i];
4790     PetscCall(PetscArrayzero(ba_i, bnzi));
4791 
4792     /* add local non-zero vals of this proc's seqmat into ba */
4793     anzi   = ai[arow + 1] - ai[arow];
4794     aj     = a->j + ai[arow];
4795     aa     = a_a + ai[arow];
4796     nextaj = 0;
4797     for (j = 0; nextaj < anzi; j++) {
4798       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4799         ba_i[j] += aa[nextaj++];
4800       }
4801     }
4802 
4803     /* add received vals into ba */
4804     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4805       /* i-th row */
4806       if (i == *nextrow[k]) {
4807         anzi   = *(nextai[k] + 1) - *nextai[k];
4808         aj     = buf_rj[k] + *(nextai[k]);
4809         aa     = abuf_r[k] + *(nextai[k]);
4810         nextaj = 0;
4811         for (j = 0; nextaj < anzi; j++) {
4812           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4813             ba_i[j] += aa[nextaj++];
4814           }
4815         }
4816         nextrow[k]++;
4817         nextai[k]++;
4818       }
4819     }
4820     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4821   }
4822   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4823   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4824   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4825 
4826   PetscCall(PetscFree(abuf_r[0]));
4827   PetscCall(PetscFree(abuf_r));
4828   PetscCall(PetscFree(ba_i));
4829   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4830   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4831   PetscFunctionReturn(PETSC_SUCCESS);
4832 }
4833 
4834 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4835 {
4836   Mat                  B_mpi;
4837   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4838   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4839   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4840   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4841   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4842   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4843   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4844   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4845   MPI_Status          *status;
4846   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4847   PetscBT              lnkbt;
4848   Mat_Merge_SeqsToMPI *merge;
4849   PetscContainer       container;
4850 
4851   PetscFunctionBegin;
4852   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4853 
4854   /* make sure it is a PETSc comm */
4855   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4856   PetscCallMPI(MPI_Comm_size(comm, &size));
4857   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4858 
4859   PetscCall(PetscNew(&merge));
4860   PetscCall(PetscMalloc1(size, &status));
4861 
4862   /* determine row ownership */
4863   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4864   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4865   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4866   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4867   PetscCall(PetscLayoutSetUp(merge->rowmap));
4868   PetscCall(PetscMalloc1(size, &len_si));
4869   PetscCall(PetscMalloc1(size, &merge->len_s));
4870 
4871   m      = merge->rowmap->n;
4872   owners = merge->rowmap->range;
4873 
4874   /* determine the number of messages to send, their lengths */
4875   len_s = merge->len_s;
4876 
4877   len          = 0; /* length of buf_si[] */
4878   merge->nsend = 0;
4879   for (proc = 0; proc < size; proc++) {
4880     len_si[proc] = 0;
4881     if (proc == rank) {
4882       len_s[proc] = 0;
4883     } else {
4884       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4885       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4886     }
4887     if (len_s[proc]) {
4888       merge->nsend++;
4889       nrows = 0;
4890       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4891         if (ai[i + 1] > ai[i]) nrows++;
4892       }
4893       len_si[proc] = 2 * (nrows + 1);
4894       len += len_si[proc];
4895     }
4896   }
4897 
4898   /* determine the number and length of messages to receive for ij-structure */
4899   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4900   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4901 
4902   /* post the Irecv of j-structure */
4903   PetscCall(PetscCommGetNewTag(comm, &tagj));
4904   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4905 
4906   /* post the Isend of j-structure */
4907   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4908 
4909   for (proc = 0, k = 0; proc < size; proc++) {
4910     if (!len_s[proc]) continue;
4911     i = owners[proc];
4912     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4913     k++;
4914   }
4915 
4916   /* receives and sends of j-structure are complete */
4917   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4918   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4919 
4920   /* send and recv i-structure */
4921   PetscCall(PetscCommGetNewTag(comm, &tagi));
4922   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4923 
4924   PetscCall(PetscMalloc1(len + 1, &buf_s));
4925   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4926   for (proc = 0, k = 0; proc < size; proc++) {
4927     if (!len_s[proc]) continue;
4928     /* form outgoing message for i-structure:
4929          buf_si[0]:                 nrows to be sent
4930                [1:nrows]:           row index (global)
4931                [nrows+1:2*nrows+1]: i-structure index
4932     */
4933     nrows       = len_si[proc] / 2 - 1;
4934     buf_si_i    = buf_si + nrows + 1;
4935     buf_si[0]   = nrows;
4936     buf_si_i[0] = 0;
4937     nrows       = 0;
4938     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4939       anzi = ai[i + 1] - ai[i];
4940       if (anzi) {
4941         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4942         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4943         nrows++;
4944       }
4945     }
4946     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4947     k++;
4948     buf_si += len_si[proc];
4949   }
4950 
4951   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4952   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4953 
4954   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4955   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4956 
4957   PetscCall(PetscFree(len_si));
4958   PetscCall(PetscFree(len_ri));
4959   PetscCall(PetscFree(rj_waits));
4960   PetscCall(PetscFree2(si_waits, sj_waits));
4961   PetscCall(PetscFree(ri_waits));
4962   PetscCall(PetscFree(buf_s));
4963   PetscCall(PetscFree(status));
4964 
4965   /* compute a local seq matrix in each processor */
4966   /* allocate bi array and free space for accumulating nonzero column info */
4967   PetscCall(PetscMalloc1(m + 1, &bi));
4968   bi[0] = 0;
4969 
4970   /* create and initialize a linked list */
4971   nlnk = N + 1;
4972   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4973 
4974   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4975   len = ai[owners[rank + 1]] - ai[owners[rank]];
4976   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4977 
4978   current_space = free_space;
4979 
4980   /* determine symbolic info for each local row */
4981   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4982 
4983   for (k = 0; k < merge->nrecv; k++) {
4984     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4985     nrows       = *buf_ri_k[k];
4986     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4987     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4988   }
4989 
4990   MatPreallocateBegin(comm, m, n, dnz, onz);
4991   len = 0;
4992   for (i = 0; i < m; i++) {
4993     bnzi = 0;
4994     /* add local non-zero cols of this proc's seqmat into lnk */
4995     arow = owners[rank] + i;
4996     anzi = ai[arow + 1] - ai[arow];
4997     aj   = a->j + ai[arow];
4998     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4999     bnzi += nlnk;
5000     /* add received col data into lnk */
5001     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5002       if (i == *nextrow[k]) {            /* i-th row */
5003         anzi = *(nextai[k] + 1) - *nextai[k];
5004         aj   = buf_rj[k] + *nextai[k];
5005         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5006         bnzi += nlnk;
5007         nextrow[k]++;
5008         nextai[k]++;
5009       }
5010     }
5011     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5012 
5013     /* if free space is not available, make more free space */
5014     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5015     /* copy data into free space, then initialize lnk */
5016     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5017     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5018 
5019     current_space->array += bnzi;
5020     current_space->local_used += bnzi;
5021     current_space->local_remaining -= bnzi;
5022 
5023     bi[i + 1] = bi[i] + bnzi;
5024   }
5025 
5026   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5027 
5028   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5029   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5030   PetscCall(PetscLLDestroy(lnk, lnkbt));
5031 
5032   /* create symbolic parallel matrix B_mpi */
5033   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5034   PetscCall(MatCreate(comm, &B_mpi));
5035   if (n == PETSC_DECIDE) {
5036     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5037   } else {
5038     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5039   }
5040   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5041   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5042   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5043   MatPreallocateEnd(dnz, onz);
5044   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5045 
5046   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5047   B_mpi->assembled = PETSC_FALSE;
5048   merge->bi        = bi;
5049   merge->bj        = bj;
5050   merge->buf_ri    = buf_ri;
5051   merge->buf_rj    = buf_rj;
5052   merge->coi       = NULL;
5053   merge->coj       = NULL;
5054   merge->owners_co = NULL;
5055 
5056   PetscCall(PetscCommDestroy(&comm));
5057 
5058   /* attach the supporting struct to B_mpi for reuse */
5059   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5060   PetscCall(PetscContainerSetPointer(container, merge));
5061   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5062   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5063   PetscCall(PetscContainerDestroy(&container));
5064   *mpimat = B_mpi;
5065 
5066   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5067   PetscFunctionReturn(PETSC_SUCCESS);
5068 }
5069 
5070 /*@C
5071   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5072   matrices from each processor
5073 
5074   Collective
5075 
5076   Input Parameters:
5077 + comm   - the communicators the parallel matrix will live on
5078 . seqmat - the input sequential matrices
5079 . m      - number of local rows (or `PETSC_DECIDE`)
5080 . n      - number of local columns (or `PETSC_DECIDE`)
5081 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5082 
5083   Output Parameter:
5084 . mpimat - the parallel matrix generated
5085 
5086   Level: advanced
5087 
5088   Note:
5089   The dimensions of the sequential matrix in each processor MUST be the same.
5090   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5091   destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5092 
5093 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5094 @*/
5095 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5096 {
5097   PetscMPIInt size;
5098 
5099   PetscFunctionBegin;
5100   PetscCallMPI(MPI_Comm_size(comm, &size));
5101   if (size == 1) {
5102     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5103     if (scall == MAT_INITIAL_MATRIX) {
5104       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5105     } else {
5106       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5107     }
5108     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5109     PetscFunctionReturn(PETSC_SUCCESS);
5110   }
5111   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5112   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5113   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5114   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5115   PetscFunctionReturn(PETSC_SUCCESS);
5116 }
5117 
5118 /*@
5119   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5120 
5121   Not Collective
5122 
5123   Input Parameter:
5124 . A - the matrix
5125 
5126   Output Parameter:
5127 . A_loc - the local sequential matrix generated
5128 
5129   Level: developer
5130 
5131   Notes:
5132   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5133   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5134   `n` is the global column count obtained with `MatGetSize()`
5135 
5136   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5137 
5138   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5139 
5140   Destroy the matrix with `MatDestroy()`
5141 
5142 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5143 @*/
5144 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5145 {
5146   PetscBool mpi;
5147 
5148   PetscFunctionBegin;
5149   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5150   if (mpi) {
5151     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5152   } else {
5153     *A_loc = A;
5154     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5155   }
5156   PetscFunctionReturn(PETSC_SUCCESS);
5157 }
5158 
5159 /*@
5160   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5161 
5162   Not Collective
5163 
5164   Input Parameters:
5165 + A     - the matrix
5166 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5167 
5168   Output Parameter:
5169 . A_loc - the local sequential matrix generated
5170 
5171   Level: developer
5172 
5173   Notes:
5174   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5175   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5176   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5177 
5178   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5179 
5180   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5181   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5182   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5183   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5184 
5185 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5186 @*/
5187 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5188 {
5189   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5190   Mat_SeqAIJ        *mat, *a, *b;
5191   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5192   const PetscScalar *aa, *ba, *aav, *bav;
5193   PetscScalar       *ca, *cam;
5194   PetscMPIInt        size;
5195   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5196   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5197   PetscBool          match;
5198 
5199   PetscFunctionBegin;
5200   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5201   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5202   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5203   if (size == 1) {
5204     if (scall == MAT_INITIAL_MATRIX) {
5205       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5206       *A_loc = mpimat->A;
5207     } else if (scall == MAT_REUSE_MATRIX) {
5208       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5209     }
5210     PetscFunctionReturn(PETSC_SUCCESS);
5211   }
5212 
5213   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5214   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5215   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5216   ai = a->i;
5217   aj = a->j;
5218   bi = b->i;
5219   bj = b->j;
5220   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5221   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5222   aa = aav;
5223   ba = bav;
5224   if (scall == MAT_INITIAL_MATRIX) {
5225     PetscCall(PetscMalloc1(1 + am, &ci));
5226     ci[0] = 0;
5227     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5228     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5229     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5230     k = 0;
5231     for (i = 0; i < am; i++) {
5232       ncols_o = bi[i + 1] - bi[i];
5233       ncols_d = ai[i + 1] - ai[i];
5234       /* off-diagonal portion of A */
5235       for (jo = 0; jo < ncols_o; jo++) {
5236         col = cmap[*bj];
5237         if (col >= cstart) break;
5238         cj[k] = col;
5239         bj++;
5240         ca[k++] = *ba++;
5241       }
5242       /* diagonal portion of A */
5243       for (j = 0; j < ncols_d; j++) {
5244         cj[k]   = cstart + *aj++;
5245         ca[k++] = *aa++;
5246       }
5247       /* off-diagonal portion of A */
5248       for (j = jo; j < ncols_o; j++) {
5249         cj[k]   = cmap[*bj++];
5250         ca[k++] = *ba++;
5251       }
5252     }
5253     /* put together the new matrix */
5254     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5255     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5256     /* Since these are PETSc arrays, change flags to free them as necessary. */
5257     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5258     mat->free_a  = PETSC_TRUE;
5259     mat->free_ij = PETSC_TRUE;
5260     mat->nonew   = 0;
5261   } else if (scall == MAT_REUSE_MATRIX) {
5262     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5263     ci  = mat->i;
5264     cj  = mat->j;
5265     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5266     for (i = 0; i < am; i++) {
5267       /* off-diagonal portion of A */
5268       ncols_o = bi[i + 1] - bi[i];
5269       for (jo = 0; jo < ncols_o; jo++) {
5270         col = cmap[*bj];
5271         if (col >= cstart) break;
5272         *cam++ = *ba++;
5273         bj++;
5274       }
5275       /* diagonal portion of A */
5276       ncols_d = ai[i + 1] - ai[i];
5277       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5278       /* off-diagonal portion of A */
5279       for (j = jo; j < ncols_o; j++) {
5280         *cam++ = *ba++;
5281         bj++;
5282       }
5283     }
5284     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5285   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5286   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5287   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5288   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5289   PetscFunctionReturn(PETSC_SUCCESS);
5290 }
5291 
5292 /*@
5293   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5294   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5295 
5296   Not Collective
5297 
5298   Input Parameters:
5299 + A     - the matrix
5300 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5301 
5302   Output Parameters:
5303 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5304 - A_loc - the local sequential matrix generated
5305 
5306   Level: developer
5307 
5308   Note:
5309   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5310   part, then those associated with the off-diagonal part (in its local ordering)
5311 
5312 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5313 @*/
5314 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5315 {
5316   Mat             Ao, Ad;
5317   const PetscInt *cmap;
5318   PetscMPIInt     size;
5319   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5320 
5321   PetscFunctionBegin;
5322   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5323   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5324   if (size == 1) {
5325     if (scall == MAT_INITIAL_MATRIX) {
5326       PetscCall(PetscObjectReference((PetscObject)Ad));
5327       *A_loc = Ad;
5328     } else if (scall == MAT_REUSE_MATRIX) {
5329       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5330     }
5331     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5332     PetscFunctionReturn(PETSC_SUCCESS);
5333   }
5334   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5335   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5336   if (f) {
5337     PetscCall((*f)(A, scall, glob, A_loc));
5338   } else {
5339     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5340     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5341     Mat_SeqAIJ        *c;
5342     PetscInt          *ai = a->i, *aj = a->j;
5343     PetscInt          *bi = b->i, *bj = b->j;
5344     PetscInt          *ci, *cj;
5345     const PetscScalar *aa, *ba;
5346     PetscScalar       *ca;
5347     PetscInt           i, j, am, dn, on;
5348 
5349     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5350     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5351     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5352     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5353     if (scall == MAT_INITIAL_MATRIX) {
5354       PetscInt k;
5355       PetscCall(PetscMalloc1(1 + am, &ci));
5356       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5357       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5358       ci[0] = 0;
5359       for (i = 0, k = 0; i < am; i++) {
5360         const PetscInt ncols_o = bi[i + 1] - bi[i];
5361         const PetscInt ncols_d = ai[i + 1] - ai[i];
5362         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5363         /* diagonal portion of A */
5364         for (j = 0; j < ncols_d; j++, k++) {
5365           cj[k] = *aj++;
5366           ca[k] = *aa++;
5367         }
5368         /* off-diagonal portion of A */
5369         for (j = 0; j < ncols_o; j++, k++) {
5370           cj[k] = dn + *bj++;
5371           ca[k] = *ba++;
5372         }
5373       }
5374       /* put together the new matrix */
5375       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5376       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5377       /* Since these are PETSc arrays, change flags to free them as necessary. */
5378       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5379       c->free_a  = PETSC_TRUE;
5380       c->free_ij = PETSC_TRUE;
5381       c->nonew   = 0;
5382       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5383     } else if (scall == MAT_REUSE_MATRIX) {
5384       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5385       for (i = 0; i < am; i++) {
5386         const PetscInt ncols_d = ai[i + 1] - ai[i];
5387         const PetscInt ncols_o = bi[i + 1] - bi[i];
5388         /* diagonal portion of A */
5389         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5390         /* off-diagonal portion of A */
5391         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5392       }
5393       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5394     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5395     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5396     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5397     if (glob) {
5398       PetscInt cst, *gidx;
5399 
5400       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5401       PetscCall(PetscMalloc1(dn + on, &gidx));
5402       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5403       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5404       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5405     }
5406   }
5407   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5408   PetscFunctionReturn(PETSC_SUCCESS);
5409 }
5410 
5411 /*@C
5412   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5413 
5414   Not Collective
5415 
5416   Input Parameters:
5417 + A     - the matrix
5418 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5419 . row   - index set of rows to extract (or `NULL`)
5420 - col   - index set of columns to extract (or `NULL`)
5421 
5422   Output Parameter:
5423 . A_loc - the local sequential matrix generated
5424 
5425   Level: developer
5426 
5427 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5428 @*/
5429 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5430 {
5431   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5432   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5433   IS          isrowa, iscola;
5434   Mat        *aloc;
5435   PetscBool   match;
5436 
5437   PetscFunctionBegin;
5438   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5439   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5440   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5441   if (!row) {
5442     start = A->rmap->rstart;
5443     end   = A->rmap->rend;
5444     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5445   } else {
5446     isrowa = *row;
5447   }
5448   if (!col) {
5449     start = A->cmap->rstart;
5450     cmap  = a->garray;
5451     nzA   = a->A->cmap->n;
5452     nzB   = a->B->cmap->n;
5453     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5454     ncols = 0;
5455     for (i = 0; i < nzB; i++) {
5456       if (cmap[i] < start) idx[ncols++] = cmap[i];
5457       else break;
5458     }
5459     imark = i;
5460     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5461     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5462     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5463   } else {
5464     iscola = *col;
5465   }
5466   if (scall != MAT_INITIAL_MATRIX) {
5467     PetscCall(PetscMalloc1(1, &aloc));
5468     aloc[0] = *A_loc;
5469   }
5470   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5471   if (!col) { /* attach global id of condensed columns */
5472     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5473   }
5474   *A_loc = aloc[0];
5475   PetscCall(PetscFree(aloc));
5476   if (!row) PetscCall(ISDestroy(&isrowa));
5477   if (!col) PetscCall(ISDestroy(&iscola));
5478   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5479   PetscFunctionReturn(PETSC_SUCCESS);
5480 }
5481 
5482 /*
5483  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5484  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5485  * on a global size.
5486  * */
5487 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5488 {
5489   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5490   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5491   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5492   PetscMPIInt            owner;
5493   PetscSFNode           *iremote, *oiremote;
5494   const PetscInt        *lrowindices;
5495   PetscSF                sf, osf;
5496   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5497   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5498   MPI_Comm               comm;
5499   ISLocalToGlobalMapping mapping;
5500   const PetscScalar     *pd_a, *po_a;
5501 
5502   PetscFunctionBegin;
5503   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5504   /* plocalsize is the number of roots
5505    * nrows is the number of leaves
5506    * */
5507   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5508   PetscCall(ISGetLocalSize(rows, &nrows));
5509   PetscCall(PetscCalloc1(nrows, &iremote));
5510   PetscCall(ISGetIndices(rows, &lrowindices));
5511   for (i = 0; i < nrows; i++) {
5512     /* Find a remote index and an owner for a row
5513      * The row could be local or remote
5514      * */
5515     owner = 0;
5516     lidx  = 0;
5517     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5518     iremote[i].index = lidx;
5519     iremote[i].rank  = owner;
5520   }
5521   /* Create SF to communicate how many nonzero columns for each row */
5522   PetscCall(PetscSFCreate(comm, &sf));
5523   /* SF will figure out the number of nonzero columns for each row, and their
5524    * offsets
5525    * */
5526   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5527   PetscCall(PetscSFSetFromOptions(sf));
5528   PetscCall(PetscSFSetUp(sf));
5529 
5530   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5531   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5532   PetscCall(PetscCalloc1(nrows, &pnnz));
5533   roffsets[0] = 0;
5534   roffsets[1] = 0;
5535   for (i = 0; i < plocalsize; i++) {
5536     /* diagonal */
5537     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5538     /* off-diagonal */
5539     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5540     /* compute offsets so that we relative location for each row */
5541     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5542     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5543   }
5544   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5545   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5546   /* 'r' means root, and 'l' means leaf */
5547   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5548   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5549   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5550   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5551   PetscCall(PetscSFDestroy(&sf));
5552   PetscCall(PetscFree(roffsets));
5553   PetscCall(PetscFree(nrcols));
5554   dntotalcols = 0;
5555   ontotalcols = 0;
5556   ncol        = 0;
5557   for (i = 0; i < nrows; i++) {
5558     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5559     ncol    = PetscMax(pnnz[i], ncol);
5560     /* diagonal */
5561     dntotalcols += nlcols[i * 2 + 0];
5562     /* off-diagonal */
5563     ontotalcols += nlcols[i * 2 + 1];
5564   }
5565   /* We do not need to figure the right number of columns
5566    * since all the calculations will be done by going through the raw data
5567    * */
5568   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5569   PetscCall(MatSetUp(*P_oth));
5570   PetscCall(PetscFree(pnnz));
5571   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5572   /* diagonal */
5573   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5574   /* off-diagonal */
5575   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5576   /* diagonal */
5577   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5578   /* off-diagonal */
5579   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5580   dntotalcols = 0;
5581   ontotalcols = 0;
5582   ntotalcols  = 0;
5583   for (i = 0; i < nrows; i++) {
5584     owner = 0;
5585     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5586     /* Set iremote for diag matrix */
5587     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5588       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5589       iremote[dntotalcols].rank  = owner;
5590       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5591       ilocal[dntotalcols++] = ntotalcols++;
5592     }
5593     /* off-diagonal */
5594     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5595       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5596       oiremote[ontotalcols].rank  = owner;
5597       oilocal[ontotalcols++]      = ntotalcols++;
5598     }
5599   }
5600   PetscCall(ISRestoreIndices(rows, &lrowindices));
5601   PetscCall(PetscFree(loffsets));
5602   PetscCall(PetscFree(nlcols));
5603   PetscCall(PetscSFCreate(comm, &sf));
5604   /* P serves as roots and P_oth is leaves
5605    * Diag matrix
5606    * */
5607   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5608   PetscCall(PetscSFSetFromOptions(sf));
5609   PetscCall(PetscSFSetUp(sf));
5610 
5611   PetscCall(PetscSFCreate(comm, &osf));
5612   /* off-diagonal */
5613   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5614   PetscCall(PetscSFSetFromOptions(osf));
5615   PetscCall(PetscSFSetUp(osf));
5616   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5617   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5618   /* operate on the matrix internal data to save memory */
5619   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5620   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5621   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5622   /* Convert to global indices for diag matrix */
5623   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5624   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5625   /* We want P_oth store global indices */
5626   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5627   /* Use memory scalable approach */
5628   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5629   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5630   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5631   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5632   /* Convert back to local indices */
5633   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5634   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5635   nout = 0;
5636   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5637   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5638   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5639   /* Exchange values */
5640   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5641   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5642   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5643   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5644   /* Stop PETSc from shrinking memory */
5645   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5646   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5647   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5648   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5649   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5650   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5651   PetscCall(PetscSFDestroy(&sf));
5652   PetscCall(PetscSFDestroy(&osf));
5653   PetscFunctionReturn(PETSC_SUCCESS);
5654 }
5655 
5656 /*
5657  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5658  * This supports MPIAIJ and MAIJ
5659  * */
5660 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5661 {
5662   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5663   Mat_SeqAIJ *p_oth;
5664   IS          rows, map;
5665   PetscHMapI  hamp;
5666   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5667   MPI_Comm    comm;
5668   PetscSF     sf, osf;
5669   PetscBool   has;
5670 
5671   PetscFunctionBegin;
5672   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5673   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5674   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5675    *  and then create a submatrix (that often is an overlapping matrix)
5676    * */
5677   if (reuse == MAT_INITIAL_MATRIX) {
5678     /* Use a hash table to figure out unique keys */
5679     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5680     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5681     count = 0;
5682     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5683     for (i = 0; i < a->B->cmap->n; i++) {
5684       key = a->garray[i] / dof;
5685       PetscCall(PetscHMapIHas(hamp, key, &has));
5686       if (!has) {
5687         mapping[i] = count;
5688         PetscCall(PetscHMapISet(hamp, key, count++));
5689       } else {
5690         /* Current 'i' has the same value the previous step */
5691         mapping[i] = count - 1;
5692       }
5693     }
5694     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5695     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5696     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5697     PetscCall(PetscCalloc1(htsize, &rowindices));
5698     off = 0;
5699     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5700     PetscCall(PetscHMapIDestroy(&hamp));
5701     PetscCall(PetscSortInt(htsize, rowindices));
5702     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5703     /* In case, the matrix was already created but users want to recreate the matrix */
5704     PetscCall(MatDestroy(P_oth));
5705     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5706     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5707     PetscCall(ISDestroy(&map));
5708     PetscCall(ISDestroy(&rows));
5709   } else if (reuse == MAT_REUSE_MATRIX) {
5710     /* If matrix was already created, we simply update values using SF objects
5711      * that as attached to the matrix earlier.
5712      */
5713     const PetscScalar *pd_a, *po_a;
5714 
5715     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5716     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5717     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5718     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5719     /* Update values in place */
5720     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5721     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5722     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5723     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5724     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5725     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5726     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5727     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5728   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5729   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5730   PetscFunctionReturn(PETSC_SUCCESS);
5731 }
5732 
5733 /*@C
5734   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5735 
5736   Collective
5737 
5738   Input Parameters:
5739 + A     - the first matrix in `MATMPIAIJ` format
5740 . B     - the second matrix in `MATMPIAIJ` format
5741 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5742 
5743   Output Parameters:
5744 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5745 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5746 - B_seq - the sequential matrix generated
5747 
5748   Level: developer
5749 
5750 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5751 @*/
5752 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5753 {
5754   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5755   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5756   IS          isrowb, iscolb;
5757   Mat        *bseq = NULL;
5758 
5759   PetscFunctionBegin;
5760   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5761              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5762   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5763 
5764   if (scall == MAT_INITIAL_MATRIX) {
5765     start = A->cmap->rstart;
5766     cmap  = a->garray;
5767     nzA   = a->A->cmap->n;
5768     nzB   = a->B->cmap->n;
5769     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5770     ncols = 0;
5771     for (i = 0; i < nzB; i++) { /* row < local row index */
5772       if (cmap[i] < start) idx[ncols++] = cmap[i];
5773       else break;
5774     }
5775     imark = i;
5776     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5777     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5778     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5779     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5780   } else {
5781     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5782     isrowb = *rowb;
5783     iscolb = *colb;
5784     PetscCall(PetscMalloc1(1, &bseq));
5785     bseq[0] = *B_seq;
5786   }
5787   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5788   *B_seq = bseq[0];
5789   PetscCall(PetscFree(bseq));
5790   if (!rowb) {
5791     PetscCall(ISDestroy(&isrowb));
5792   } else {
5793     *rowb = isrowb;
5794   }
5795   if (!colb) {
5796     PetscCall(ISDestroy(&iscolb));
5797   } else {
5798     *colb = iscolb;
5799   }
5800   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5801   PetscFunctionReturn(PETSC_SUCCESS);
5802 }
5803 
5804 /*
5805     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5806     of the OFF-DIAGONAL portion of local A
5807 
5808     Collective
5809 
5810    Input Parameters:
5811 +    A,B - the matrices in `MATMPIAIJ` format
5812 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5813 
5814    Output Parameter:
5815 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5816 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5817 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5818 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5819 
5820     Developer Note:
5821     This directly accesses information inside the VecScatter associated with the matrix-vector product
5822      for this matrix. This is not desirable..
5823 
5824     Level: developer
5825 
5826 */
5827 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5828 {
5829   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5830   Mat_SeqAIJ        *b_oth;
5831   VecScatter         ctx;
5832   MPI_Comm           comm;
5833   const PetscMPIInt *rprocs, *sprocs;
5834   const PetscInt    *srow, *rstarts, *sstarts;
5835   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5836   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5837   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5838   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5839   PetscMPIInt        size, tag, rank, nreqs;
5840 
5841   PetscFunctionBegin;
5842   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5843   PetscCallMPI(MPI_Comm_size(comm, &size));
5844 
5845   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5846              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5847   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5848   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5849 
5850   if (size == 1) {
5851     startsj_s = NULL;
5852     bufa_ptr  = NULL;
5853     *B_oth    = NULL;
5854     PetscFunctionReturn(PETSC_SUCCESS);
5855   }
5856 
5857   ctx = a->Mvctx;
5858   tag = ((PetscObject)ctx)->tag;
5859 
5860   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5861   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5862   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5863   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5864   PetscCall(PetscMalloc1(nreqs, &reqs));
5865   rwaits = reqs;
5866   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5867 
5868   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5869   if (scall == MAT_INITIAL_MATRIX) {
5870     /* i-array */
5871     /*  post receives */
5872     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5873     for (i = 0; i < nrecvs; i++) {
5874       rowlen = rvalues + rstarts[i] * rbs;
5875       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5876       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5877     }
5878 
5879     /* pack the outgoing message */
5880     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5881 
5882     sstartsj[0] = 0;
5883     rstartsj[0] = 0;
5884     len         = 0; /* total length of j or a array to be sent */
5885     if (nsends) {
5886       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5887       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5888     }
5889     for (i = 0; i < nsends; i++) {
5890       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5891       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5892       for (j = 0; j < nrows; j++) {
5893         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5894         for (l = 0; l < sbs; l++) {
5895           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5896 
5897           rowlen[j * sbs + l] = ncols;
5898 
5899           len += ncols;
5900           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5901         }
5902         k++;
5903       }
5904       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5905 
5906       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5907     }
5908     /* recvs and sends of i-array are completed */
5909     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5910     PetscCall(PetscFree(svalues));
5911 
5912     /* allocate buffers for sending j and a arrays */
5913     PetscCall(PetscMalloc1(len + 1, &bufj));
5914     PetscCall(PetscMalloc1(len + 1, &bufa));
5915 
5916     /* create i-array of B_oth */
5917     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5918 
5919     b_othi[0] = 0;
5920     len       = 0; /* total length of j or a array to be received */
5921     k         = 0;
5922     for (i = 0; i < nrecvs; i++) {
5923       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5924       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5925       for (j = 0; j < nrows; j++) {
5926         b_othi[k + 1] = b_othi[k] + rowlen[j];
5927         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5928         k++;
5929       }
5930       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5931     }
5932     PetscCall(PetscFree(rvalues));
5933 
5934     /* allocate space for j and a arrays of B_oth */
5935     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5936     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5937 
5938     /* j-array */
5939     /*  post receives of j-array */
5940     for (i = 0; i < nrecvs; i++) {
5941       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5942       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5943     }
5944 
5945     /* pack the outgoing message j-array */
5946     if (nsends) k = sstarts[0];
5947     for (i = 0; i < nsends; i++) {
5948       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5949       bufJ  = bufj + sstartsj[i];
5950       for (j = 0; j < nrows; j++) {
5951         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5952         for (ll = 0; ll < sbs; ll++) {
5953           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5954           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5955           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5956         }
5957       }
5958       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5959     }
5960 
5961     /* recvs and sends of j-array are completed */
5962     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5963   } else if (scall == MAT_REUSE_MATRIX) {
5964     sstartsj = *startsj_s;
5965     rstartsj = *startsj_r;
5966     bufa     = *bufa_ptr;
5967     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5968     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5969   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5970 
5971   /* a-array */
5972   /*  post receives of a-array */
5973   for (i = 0; i < nrecvs; i++) {
5974     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5975     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5976   }
5977 
5978   /* pack the outgoing message a-array */
5979   if (nsends) k = sstarts[0];
5980   for (i = 0; i < nsends; i++) {
5981     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5982     bufA  = bufa + sstartsj[i];
5983     for (j = 0; j < nrows; j++) {
5984       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5985       for (ll = 0; ll < sbs; ll++) {
5986         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5987         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5988         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5989       }
5990     }
5991     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5992   }
5993   /* recvs and sends of a-array are completed */
5994   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5995   PetscCall(PetscFree(reqs));
5996 
5997   if (scall == MAT_INITIAL_MATRIX) {
5998     /* put together the new matrix */
5999     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6000 
6001     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6002     /* Since these are PETSc arrays, change flags to free them as necessary. */
6003     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6004     b_oth->free_a  = PETSC_TRUE;
6005     b_oth->free_ij = PETSC_TRUE;
6006     b_oth->nonew   = 0;
6007 
6008     PetscCall(PetscFree(bufj));
6009     if (!startsj_s || !bufa_ptr) {
6010       PetscCall(PetscFree2(sstartsj, rstartsj));
6011       PetscCall(PetscFree(bufa_ptr));
6012     } else {
6013       *startsj_s = sstartsj;
6014       *startsj_r = rstartsj;
6015       *bufa_ptr  = bufa;
6016     }
6017   } else if (scall == MAT_REUSE_MATRIX) {
6018     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6019   }
6020 
6021   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6022   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6023   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6024   PetscFunctionReturn(PETSC_SUCCESS);
6025 }
6026 
6027 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6030 #if defined(PETSC_HAVE_MKL_SPARSE)
6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6032 #endif
6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6035 #if defined(PETSC_HAVE_ELEMENTAL)
6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6037 #endif
6038 #if defined(PETSC_HAVE_SCALAPACK)
6039 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6040 #endif
6041 #if defined(PETSC_HAVE_HYPRE)
6042 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6043 #endif
6044 #if defined(PETSC_HAVE_CUDA)
6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6046 #endif
6047 #if defined(PETSC_HAVE_HIP)
6048 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6049 #endif
6050 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6052 #endif
6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6054 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6055 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6056 
6057 /*
6058     Computes (B'*A')' since computing B*A directly is untenable
6059 
6060                n                       p                          p
6061         [             ]       [             ]         [                 ]
6062       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6063         [             ]       [             ]         [                 ]
6064 
6065 */
6066 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6067 {
6068   Mat At, Bt, Ct;
6069 
6070   PetscFunctionBegin;
6071   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6072   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6073   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6074   PetscCall(MatDestroy(&At));
6075   PetscCall(MatDestroy(&Bt));
6076   PetscCall(MatTransposeSetPrecursor(Ct, C));
6077   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6078   PetscCall(MatDestroy(&Ct));
6079   PetscFunctionReturn(PETSC_SUCCESS);
6080 }
6081 
6082 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6083 {
6084   PetscBool cisdense;
6085 
6086   PetscFunctionBegin;
6087   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6088   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6089   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6090   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6091   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6092   PetscCall(MatSetUp(C));
6093 
6094   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6095   PetscFunctionReturn(PETSC_SUCCESS);
6096 }
6097 
6098 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6099 {
6100   Mat_Product *product = C->product;
6101   Mat          A = product->A, B = product->B;
6102 
6103   PetscFunctionBegin;
6104   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6105              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6106   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6107   C->ops->productsymbolic = MatProductSymbolic_AB;
6108   PetscFunctionReturn(PETSC_SUCCESS);
6109 }
6110 
6111 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6112 {
6113   Mat_Product *product = C->product;
6114 
6115   PetscFunctionBegin;
6116   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6117   PetscFunctionReturn(PETSC_SUCCESS);
6118 }
6119 
6120 /*
6121    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6122 
6123   Input Parameters:
6124 
6125     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6126     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6127 
6128     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6129 
6130     For Set1, j1[] contains column indices of the nonzeros.
6131     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6132     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6133     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6134 
6135     Similar for Set2.
6136 
6137     This routine merges the two sets of nonzeros row by row and removes repeats.
6138 
6139   Output Parameters: (memory is allocated by the caller)
6140 
6141     i[],j[]: the CSR of the merged matrix, which has m rows.
6142     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6143     imap2[]: similar to imap1[], but for Set2.
6144     Note we order nonzeros row-by-row and from left to right.
6145 */
6146 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6147 {
6148   PetscInt   r, m; /* Row index of mat */
6149   PetscCount t, t1, t2, b1, e1, b2, e2;
6150 
6151   PetscFunctionBegin;
6152   PetscCall(MatGetLocalSize(mat, &m, NULL));
6153   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6154   i[0]        = 0;
6155   for (r = 0; r < m; r++) { /* Do row by row merging */
6156     b1 = rowBegin1[r];
6157     e1 = rowEnd1[r];
6158     b2 = rowBegin2[r];
6159     e2 = rowEnd2[r];
6160     while (b1 < e1 && b2 < e2) {
6161       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6162         j[t]      = j1[b1];
6163         imap1[t1] = t;
6164         imap2[t2] = t;
6165         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6166         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6167         t1++;
6168         t2++;
6169         t++;
6170       } else if (j1[b1] < j2[b2]) {
6171         j[t]      = j1[b1];
6172         imap1[t1] = t;
6173         b1 += jmap1[t1 + 1] - jmap1[t1];
6174         t1++;
6175         t++;
6176       } else {
6177         j[t]      = j2[b2];
6178         imap2[t2] = t;
6179         b2 += jmap2[t2 + 1] - jmap2[t2];
6180         t2++;
6181         t++;
6182       }
6183     }
6184     /* Merge the remaining in either j1[] or j2[] */
6185     while (b1 < e1) {
6186       j[t]      = j1[b1];
6187       imap1[t1] = t;
6188       b1 += jmap1[t1 + 1] - jmap1[t1];
6189       t1++;
6190       t++;
6191     }
6192     while (b2 < e2) {
6193       j[t]      = j2[b2];
6194       imap2[t2] = t;
6195       b2 += jmap2[t2 + 1] - jmap2[t2];
6196       t2++;
6197       t++;
6198     }
6199     i[r + 1] = t;
6200   }
6201   PetscFunctionReturn(PETSC_SUCCESS);
6202 }
6203 
6204 /*
6205   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6206 
6207   Input Parameters:
6208     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6209     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6210       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6211 
6212       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6213       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6214 
6215   Output Parameters:
6216     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6217     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6218       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6219       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6220 
6221     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6222       Atot: number of entries belonging to the diagonal block.
6223       Annz: number of unique nonzeros belonging to the diagonal block.
6224       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6225         repeats (i.e., same 'i,j' pair).
6226       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6227         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6228 
6229       Atot: number of entries belonging to the diagonal block
6230       Annz: number of unique nonzeros belonging to the diagonal block.
6231 
6232     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6233 
6234     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6235 */
6236 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6237 {
6238   PetscInt    cstart, cend, rstart, rend, row, col;
6239   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6240   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6241   PetscCount  k, m, p, q, r, s, mid;
6242   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6243 
6244   PetscFunctionBegin;
6245   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6246   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6247   m = rend - rstart;
6248 
6249   /* Skip negative rows */
6250   for (k = 0; k < n; k++)
6251     if (i[k] >= 0) break;
6252 
6253   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6254      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6255   */
6256   while (k < n) {
6257     row = i[k];
6258     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6259     for (s = k; s < n; s++)
6260       if (i[s] != row) break;
6261 
6262     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6263     for (p = k; p < s; p++) {
6264       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6265       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6266     }
6267     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6268     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6269     rowBegin[row - rstart] = k;
6270     rowMid[row - rstart]   = mid;
6271     rowEnd[row - rstart]   = s;
6272 
6273     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6274     Atot += mid - k;
6275     Btot += s - mid;
6276 
6277     /* Count unique nonzeros of this diag row */
6278     for (p = k; p < mid;) {
6279       col = j[p];
6280       do {
6281         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6282         p++;
6283       } while (p < mid && j[p] == col);
6284       Annz++;
6285     }
6286 
6287     /* Count unique nonzeros of this offdiag row */
6288     for (p = mid; p < s;) {
6289       col = j[p];
6290       do {
6291         p++;
6292       } while (p < s && j[p] == col);
6293       Bnnz++;
6294     }
6295     k = s;
6296   }
6297 
6298   /* Allocation according to Atot, Btot, Annz, Bnnz */
6299   PetscCall(PetscMalloc1(Atot, &Aperm));
6300   PetscCall(PetscMalloc1(Btot, &Bperm));
6301   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6302   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6303 
6304   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6305   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6306   for (r = 0; r < m; r++) {
6307     k   = rowBegin[r];
6308     mid = rowMid[r];
6309     s   = rowEnd[r];
6310     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6311     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6312     Atot += mid - k;
6313     Btot += s - mid;
6314 
6315     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6316     for (p = k; p < mid;) {
6317       col = j[p];
6318       q   = p;
6319       do {
6320         p++;
6321       } while (p < mid && j[p] == col);
6322       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6323       Annz++;
6324     }
6325 
6326     for (p = mid; p < s;) {
6327       col = j[p];
6328       q   = p;
6329       do {
6330         p++;
6331       } while (p < s && j[p] == col);
6332       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6333       Bnnz++;
6334     }
6335   }
6336   /* Output */
6337   *Aperm_ = Aperm;
6338   *Annz_  = Annz;
6339   *Atot_  = Atot;
6340   *Ajmap_ = Ajmap;
6341   *Bperm_ = Bperm;
6342   *Bnnz_  = Bnnz;
6343   *Btot_  = Btot;
6344   *Bjmap_ = Bjmap;
6345   PetscFunctionReturn(PETSC_SUCCESS);
6346 }
6347 
6348 /*
6349   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6350 
6351   Input Parameters:
6352     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6353     nnz:  number of unique nonzeros in the merged matrix
6354     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6355     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6356 
6357   Output Parameter: (memory is allocated by the caller)
6358     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6359 
6360   Example:
6361     nnz1 = 4
6362     nnz  = 6
6363     imap = [1,3,4,5]
6364     jmap = [0,3,5,6,7]
6365    then,
6366     jmap_new = [0,0,3,3,5,6,7]
6367 */
6368 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6369 {
6370   PetscCount k, p;
6371 
6372   PetscFunctionBegin;
6373   jmap_new[0] = 0;
6374   p           = nnz;                /* p loops over jmap_new[] backwards */
6375   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6376     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6377   }
6378   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6379   PetscFunctionReturn(PETSC_SUCCESS);
6380 }
6381 
6382 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6383 {
6384   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6385 
6386   PetscFunctionBegin;
6387   PetscCall(PetscSFDestroy(&coo->sf));
6388   PetscCall(PetscFree(coo->Aperm1));
6389   PetscCall(PetscFree(coo->Bperm1));
6390   PetscCall(PetscFree(coo->Ajmap1));
6391   PetscCall(PetscFree(coo->Bjmap1));
6392   PetscCall(PetscFree(coo->Aimap2));
6393   PetscCall(PetscFree(coo->Bimap2));
6394   PetscCall(PetscFree(coo->Aperm2));
6395   PetscCall(PetscFree(coo->Bperm2));
6396   PetscCall(PetscFree(coo->Ajmap2));
6397   PetscCall(PetscFree(coo->Bjmap2));
6398   PetscCall(PetscFree(coo->Cperm1));
6399   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6400   PetscCall(PetscFree(coo));
6401   PetscFunctionReturn(PETSC_SUCCESS);
6402 }
6403 
6404 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6405 {
6406   MPI_Comm             comm;
6407   PetscMPIInt          rank, size;
6408   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6409   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6410   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6411   PetscContainer       container;
6412   MatCOOStruct_MPIAIJ *coo;
6413 
6414   PetscFunctionBegin;
6415   PetscCall(PetscFree(mpiaij->garray));
6416   PetscCall(VecDestroy(&mpiaij->lvec));
6417 #if defined(PETSC_USE_CTABLE)
6418   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6419 #else
6420   PetscCall(PetscFree(mpiaij->colmap));
6421 #endif
6422   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6423   mat->assembled     = PETSC_FALSE;
6424   mat->was_assembled = PETSC_FALSE;
6425 
6426   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6427   PetscCallMPI(MPI_Comm_size(comm, &size));
6428   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6429   PetscCall(PetscLayoutSetUp(mat->rmap));
6430   PetscCall(PetscLayoutSetUp(mat->cmap));
6431   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6432   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6433   PetscCall(MatGetLocalSize(mat, &m, &n));
6434   PetscCall(MatGetSize(mat, &M, &N));
6435 
6436   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6437   /* entries come first, then local rows, then remote rows.                     */
6438   PetscCount n1 = coo_n, *perm1;
6439   PetscInt  *i1 = coo_i, *j1 = coo_j;
6440 
6441   PetscCall(PetscMalloc1(n1, &perm1));
6442   for (k = 0; k < n1; k++) perm1[k] = k;
6443 
6444   /* Manipulate indices so that entries with negative row or col indices will have smallest
6445      row indices, local entries will have greater but negative row indices, and remote entries
6446      will have positive row indices.
6447   */
6448   for (k = 0; k < n1; k++) {
6449     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6450     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6451     else {
6452       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6453       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6454     }
6455   }
6456 
6457   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6458   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6459 
6460   /* Advance k to the first entry we need to take care of */
6461   for (k = 0; k < n1; k++)
6462     if (i1[k] > PETSC_MIN_INT) break;
6463   PetscInt i1start = k;
6464 
6465   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6466   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6467 
6468   /*           Send remote rows to their owner                                  */
6469   /* Find which rows should be sent to which remote ranks*/
6470   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6471   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6472   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6473   const PetscInt *ranges;
6474   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6475 
6476   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6477   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6478   for (k = rem; k < n1;) {
6479     PetscMPIInt owner;
6480     PetscInt    firstRow, lastRow;
6481 
6482     /* Locate a row range */
6483     firstRow = i1[k]; /* first row of this owner */
6484     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6485     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6486 
6487     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6488     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6489 
6490     /* All entries in [k,p) belong to this remote owner */
6491     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6492       PetscMPIInt *sendto2;
6493       PetscInt    *nentries2;
6494       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6495 
6496       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6497       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6498       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6499       PetscCall(PetscFree2(sendto, nentries2));
6500       sendto   = sendto2;
6501       nentries = nentries2;
6502       maxNsend = maxNsend2;
6503     }
6504     sendto[nsend]   = owner;
6505     nentries[nsend] = p - k;
6506     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6507     nsend++;
6508     k = p;
6509   }
6510 
6511   /* Build 1st SF to know offsets on remote to send data */
6512   PetscSF      sf1;
6513   PetscInt     nroots = 1, nroots2 = 0;
6514   PetscInt     nleaves = nsend, nleaves2 = 0;
6515   PetscInt    *offsets;
6516   PetscSFNode *iremote;
6517 
6518   PetscCall(PetscSFCreate(comm, &sf1));
6519   PetscCall(PetscMalloc1(nsend, &iremote));
6520   PetscCall(PetscMalloc1(nsend, &offsets));
6521   for (k = 0; k < nsend; k++) {
6522     iremote[k].rank  = sendto[k];
6523     iremote[k].index = 0;
6524     nleaves2 += nentries[k];
6525     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6526   }
6527   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6528   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6529   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6530   PetscCall(PetscSFDestroy(&sf1));
6531   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6532 
6533   /* Build 2nd SF to send remote COOs to their owner */
6534   PetscSF sf2;
6535   nroots  = nroots2;
6536   nleaves = nleaves2;
6537   PetscCall(PetscSFCreate(comm, &sf2));
6538   PetscCall(PetscSFSetFromOptions(sf2));
6539   PetscCall(PetscMalloc1(nleaves, &iremote));
6540   p = 0;
6541   for (k = 0; k < nsend; k++) {
6542     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6543     for (q = 0; q < nentries[k]; q++, p++) {
6544       iremote[p].rank  = sendto[k];
6545       iremote[p].index = offsets[k] + q;
6546     }
6547   }
6548   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6549 
6550   /* Send the remote COOs to their owner */
6551   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6552   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6553   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6554   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6555   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6556   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6557   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6558 
6559   PetscCall(PetscFree(offsets));
6560   PetscCall(PetscFree2(sendto, nentries));
6561 
6562   /* Sort received COOs by row along with the permutation array     */
6563   for (k = 0; k < n2; k++) perm2[k] = k;
6564   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6565 
6566   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6567   PetscCount *Cperm1;
6568   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6569   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves));
6570 
6571   /* Support for HYPRE matrices, kind of a hack.
6572      Swap min column with diagonal so that diagonal values will go first */
6573   PetscBool   hypre;
6574   const char *name;
6575   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6576   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6577   if (hypre) {
6578     PetscInt *minj;
6579     PetscBT   hasdiag;
6580 
6581     PetscCall(PetscBTCreate(m, &hasdiag));
6582     PetscCall(PetscMalloc1(m, &minj));
6583     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6584     for (k = i1start; k < rem; k++) {
6585       if (j1[k] < cstart || j1[k] >= cend) continue;
6586       const PetscInt rindex = i1[k] - rstart;
6587       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6588       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6589     }
6590     for (k = 0; k < n2; k++) {
6591       if (j2[k] < cstart || j2[k] >= cend) continue;
6592       const PetscInt rindex = i2[k] - rstart;
6593       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6594       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6595     }
6596     for (k = i1start; k < rem; k++) {
6597       const PetscInt rindex = i1[k] - rstart;
6598       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6599       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6600       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6601     }
6602     for (k = 0; k < n2; k++) {
6603       const PetscInt rindex = i2[k] - rstart;
6604       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6605       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6606       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6607     }
6608     PetscCall(PetscBTDestroy(&hasdiag));
6609     PetscCall(PetscFree(minj));
6610   }
6611 
6612   /* Split local COOs and received COOs into diag/offdiag portions */
6613   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6614   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6615   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6616   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6617   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6618   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6619 
6620   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6621   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6622   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6623   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6624 
6625   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6626   PetscInt *Ai, *Bi;
6627   PetscInt *Aj, *Bj;
6628 
6629   PetscCall(PetscMalloc1(m + 1, &Ai));
6630   PetscCall(PetscMalloc1(m + 1, &Bi));
6631   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6632   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6633 
6634   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6635   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6636   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6637   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6638   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6639 
6640   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6641   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6642 
6643   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6644   /* expect nonzeros in A/B most likely have local contributing entries        */
6645   PetscInt    Annz = Ai[m];
6646   PetscInt    Bnnz = Bi[m];
6647   PetscCount *Ajmap1_new, *Bjmap1_new;
6648 
6649   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6650   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6651 
6652   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6653   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6654 
6655   PetscCall(PetscFree(Aimap1));
6656   PetscCall(PetscFree(Ajmap1));
6657   PetscCall(PetscFree(Bimap1));
6658   PetscCall(PetscFree(Bjmap1));
6659   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6660   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6661   PetscCall(PetscFree(perm1));
6662   PetscCall(PetscFree3(i2, j2, perm2));
6663 
6664   Ajmap1 = Ajmap1_new;
6665   Bjmap1 = Bjmap1_new;
6666 
6667   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6668   if (Annz < Annz1 + Annz2) {
6669     PetscInt *Aj_new;
6670     PetscCall(PetscMalloc1(Annz, &Aj_new));
6671     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6672     PetscCall(PetscFree(Aj));
6673     Aj = Aj_new;
6674   }
6675 
6676   if (Bnnz < Bnnz1 + Bnnz2) {
6677     PetscInt *Bj_new;
6678     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6679     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6680     PetscCall(PetscFree(Bj));
6681     Bj = Bj_new;
6682   }
6683 
6684   /* Create new submatrices for on-process and off-process coupling                  */
6685   PetscScalar     *Aa, *Ba;
6686   MatType          rtype;
6687   Mat_SeqAIJ      *a, *b;
6688   PetscObjectState state;
6689   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6690   PetscCall(PetscCalloc1(Bnnz, &Ba));
6691   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6692   if (cstart) {
6693     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6694   }
6695   PetscCall(MatDestroy(&mpiaij->A));
6696   PetscCall(MatDestroy(&mpiaij->B));
6697   PetscCall(MatGetRootType_Private(mat, &rtype));
6698   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6699   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6700   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6701   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6702   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6703   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6704 
6705   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6706   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6707   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6708   a->free_a = b->free_a = PETSC_TRUE;
6709   a->free_ij = b->free_ij = PETSC_TRUE;
6710 
6711   /* conversion must happen AFTER multiply setup */
6712   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6713   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6714   PetscCall(VecDestroy(&mpiaij->lvec));
6715   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6716 
6717   // Put the COO struct in a container and then attach that to the matrix
6718   PetscCall(PetscMalloc1(1, &coo));
6719   coo->n       = coo_n;
6720   coo->sf      = sf2;
6721   coo->sendlen = nleaves;
6722   coo->recvlen = nroots;
6723   coo->Annz    = Annz;
6724   coo->Bnnz    = Bnnz;
6725   coo->Annz2   = Annz2;
6726   coo->Bnnz2   = Bnnz2;
6727   coo->Atot1   = Atot1;
6728   coo->Atot2   = Atot2;
6729   coo->Btot1   = Btot1;
6730   coo->Btot2   = Btot2;
6731   coo->Ajmap1  = Ajmap1;
6732   coo->Aperm1  = Aperm1;
6733   coo->Bjmap1  = Bjmap1;
6734   coo->Bperm1  = Bperm1;
6735   coo->Aimap2  = Aimap2;
6736   coo->Ajmap2  = Ajmap2;
6737   coo->Aperm2  = Aperm2;
6738   coo->Bimap2  = Bimap2;
6739   coo->Bjmap2  = Bjmap2;
6740   coo->Bperm2  = Bperm2;
6741   coo->Cperm1  = Cperm1;
6742   // Allocate in preallocation. If not used, it has zero cost on host
6743   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6744   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6745   PetscCall(PetscContainerSetPointer(container, coo));
6746   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6747   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6748   PetscCall(PetscContainerDestroy(&container));
6749   PetscFunctionReturn(PETSC_SUCCESS);
6750 }
6751 
6752 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6753 {
6754   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6755   Mat                  A = mpiaij->A, B = mpiaij->B;
6756   PetscScalar         *Aa, *Ba;
6757   PetscScalar         *sendbuf, *recvbuf;
6758   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6759   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6760   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6761   const PetscCount    *Cperm1;
6762   PetscContainer       container;
6763   MatCOOStruct_MPIAIJ *coo;
6764 
6765   PetscFunctionBegin;
6766   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6767   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6768   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6769   sendbuf = coo->sendbuf;
6770   recvbuf = coo->recvbuf;
6771   Ajmap1  = coo->Ajmap1;
6772   Ajmap2  = coo->Ajmap2;
6773   Aimap2  = coo->Aimap2;
6774   Bjmap1  = coo->Bjmap1;
6775   Bjmap2  = coo->Bjmap2;
6776   Bimap2  = coo->Bimap2;
6777   Aperm1  = coo->Aperm1;
6778   Aperm2  = coo->Aperm2;
6779   Bperm1  = coo->Bperm1;
6780   Bperm2  = coo->Bperm2;
6781   Cperm1  = coo->Cperm1;
6782 
6783   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6784   PetscCall(MatSeqAIJGetArray(B, &Ba));
6785 
6786   /* Pack entries to be sent to remote */
6787   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6788 
6789   /* Send remote entries to their owner and overlap the communication with local computation */
6790   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6791   /* Add local entries to A and B */
6792   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6793     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6794     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6795     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6796   }
6797   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6798     PetscScalar sum = 0.0;
6799     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6800     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6801   }
6802   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6803 
6804   /* Add received remote entries to A and B */
6805   for (PetscCount i = 0; i < coo->Annz2; i++) {
6806     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6807   }
6808   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6809     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6810   }
6811   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6812   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6813   PetscFunctionReturn(PETSC_SUCCESS);
6814 }
6815 
6816 /*MC
6817    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6818 
6819    Options Database Keys:
6820 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6821 
6822    Level: beginner
6823 
6824    Notes:
6825    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6826     in this case the values associated with the rows and columns one passes in are set to zero
6827     in the matrix
6828 
6829     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6830     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6831 
6832 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6833 M*/
6834 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6835 {
6836   Mat_MPIAIJ *b;
6837   PetscMPIInt size;
6838 
6839   PetscFunctionBegin;
6840   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6841 
6842   PetscCall(PetscNew(&b));
6843   B->data       = (void *)b;
6844   B->ops[0]     = MatOps_Values;
6845   B->assembled  = PETSC_FALSE;
6846   B->insertmode = NOT_SET_VALUES;
6847   b->size       = size;
6848 
6849   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6850 
6851   /* build cache for off array entries formed */
6852   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6853 
6854   b->donotstash  = PETSC_FALSE;
6855   b->colmap      = NULL;
6856   b->garray      = NULL;
6857   b->roworiented = PETSC_TRUE;
6858 
6859   /* stuff used for matrix vector multiply */
6860   b->lvec  = NULL;
6861   b->Mvctx = NULL;
6862 
6863   /* stuff for MatGetRow() */
6864   b->rowindices   = NULL;
6865   b->rowvalues    = NULL;
6866   b->getrowactive = PETSC_FALSE;
6867 
6868   /* flexible pointer used in CUSPARSE classes */
6869   b->spptr = NULL;
6870 
6871   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6872   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6873   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6874   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6875   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6876   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6877   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6878   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6879   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6880   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6881 #if defined(PETSC_HAVE_CUDA)
6882   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6883 #endif
6884 #if defined(PETSC_HAVE_HIP)
6885   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6886 #endif
6887 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6888   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6889 #endif
6890 #if defined(PETSC_HAVE_MKL_SPARSE)
6891   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6892 #endif
6893   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6894   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6897 #if defined(PETSC_HAVE_ELEMENTAL)
6898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6899 #endif
6900 #if defined(PETSC_HAVE_SCALAPACK)
6901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6902 #endif
6903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6905 #if defined(PETSC_HAVE_HYPRE)
6906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6908 #endif
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6913   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6914   PetscFunctionReturn(PETSC_SUCCESS);
6915 }
6916 
6917 /*@C
6918   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6919   and "off-diagonal" part of the matrix in CSR format.
6920 
6921   Collective
6922 
6923   Input Parameters:
6924 + comm - MPI communicator
6925 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6926 . n    - This value should be the same as the local size used in creating the
6927        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6928        calculated if `N` is given) For square matrices `n` is almost always `m`.
6929 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6930 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6931 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6932 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6933 . a    - matrix values
6934 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6935 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6936 - oa   - matrix values
6937 
6938   Output Parameter:
6939 . mat - the matrix
6940 
6941   Level: advanced
6942 
6943   Notes:
6944   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6945   must free the arrays once the matrix has been destroyed and not before.
6946 
6947   The `i` and `j` indices are 0 based
6948 
6949   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6950 
6951   This sets local rows and cannot be used to set off-processor values.
6952 
6953   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6954   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6955   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6956   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6957   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6958   communication if it is known that only local entries will be set.
6959 
6960 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6961           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6962 @*/
6963 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6964 {
6965   Mat_MPIAIJ *maij;
6966 
6967   PetscFunctionBegin;
6968   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6969   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6970   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6971   PetscCall(MatCreate(comm, mat));
6972   PetscCall(MatSetSizes(*mat, m, n, M, N));
6973   PetscCall(MatSetType(*mat, MATMPIAIJ));
6974   maij = (Mat_MPIAIJ *)(*mat)->data;
6975 
6976   (*mat)->preallocated = PETSC_TRUE;
6977 
6978   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6979   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6980 
6981   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6982   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6983 
6984   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6985   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6986   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6987   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6988   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6989   PetscFunctionReturn(PETSC_SUCCESS);
6990 }
6991 
6992 typedef struct {
6993   Mat       *mp;    /* intermediate products */
6994   PetscBool *mptmp; /* is the intermediate product temporary ? */
6995   PetscInt   cp;    /* number of intermediate products */
6996 
6997   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6998   PetscInt    *startsj_s, *startsj_r;
6999   PetscScalar *bufa;
7000   Mat          P_oth;
7001 
7002   /* may take advantage of merging product->B */
7003   Mat Bloc; /* B-local by merging diag and off-diag */
7004 
7005   /* cusparse does not have support to split between symbolic and numeric phases.
7006      When api_user is true, we don't need to update the numerical values
7007      of the temporary storage */
7008   PetscBool reusesym;
7009 
7010   /* support for COO values insertion */
7011   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7012   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7013   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7014   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7015   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7016   PetscMemType mtype;
7017 
7018   /* customization */
7019   PetscBool abmerge;
7020   PetscBool P_oth_bind;
7021 } MatMatMPIAIJBACKEND;
7022 
7023 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7024 {
7025   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7026   PetscInt             i;
7027 
7028   PetscFunctionBegin;
7029   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7030   PetscCall(PetscFree(mmdata->bufa));
7031   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7032   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7033   PetscCall(MatDestroy(&mmdata->P_oth));
7034   PetscCall(MatDestroy(&mmdata->Bloc));
7035   PetscCall(PetscSFDestroy(&mmdata->sf));
7036   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7037   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7038   PetscCall(PetscFree(mmdata->own[0]));
7039   PetscCall(PetscFree(mmdata->own));
7040   PetscCall(PetscFree(mmdata->off[0]));
7041   PetscCall(PetscFree(mmdata->off));
7042   PetscCall(PetscFree(mmdata));
7043   PetscFunctionReturn(PETSC_SUCCESS);
7044 }
7045 
7046 /* Copy selected n entries with indices in idx[] of A to v[].
7047    If idx is NULL, copy the whole data array of A to v[]
7048  */
7049 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7050 {
7051   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7052 
7053   PetscFunctionBegin;
7054   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7055   if (f) {
7056     PetscCall((*f)(A, n, idx, v));
7057   } else {
7058     const PetscScalar *vv;
7059 
7060     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7061     if (n && idx) {
7062       PetscScalar    *w  = v;
7063       const PetscInt *oi = idx;
7064       PetscInt        j;
7065 
7066       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7067     } else {
7068       PetscCall(PetscArraycpy(v, vv, n));
7069     }
7070     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7071   }
7072   PetscFunctionReturn(PETSC_SUCCESS);
7073 }
7074 
7075 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7076 {
7077   MatMatMPIAIJBACKEND *mmdata;
7078   PetscInt             i, n_d, n_o;
7079 
7080   PetscFunctionBegin;
7081   MatCheckProduct(C, 1);
7082   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7083   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7084   if (!mmdata->reusesym) { /* update temporary matrices */
7085     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7086     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7087   }
7088   mmdata->reusesym = PETSC_FALSE;
7089 
7090   for (i = 0; i < mmdata->cp; i++) {
7091     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7092     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7093   }
7094   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7095     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7096 
7097     if (mmdata->mptmp[i]) continue;
7098     if (noff) {
7099       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7100 
7101       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7102       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7103       n_o += noff;
7104       n_d += nown;
7105     } else {
7106       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7107 
7108       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7109       n_d += mm->nz;
7110     }
7111   }
7112   if (mmdata->hasoffproc) { /* offprocess insertion */
7113     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7114     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7115   }
7116   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7117   PetscFunctionReturn(PETSC_SUCCESS);
7118 }
7119 
7120 /* Support for Pt * A, A * P, or Pt * A * P */
7121 #define MAX_NUMBER_INTERMEDIATE 4
7122 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7123 {
7124   Mat_Product           *product = C->product;
7125   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7126   Mat_MPIAIJ            *a, *p;
7127   MatMatMPIAIJBACKEND   *mmdata;
7128   ISLocalToGlobalMapping P_oth_l2g = NULL;
7129   IS                     glob      = NULL;
7130   const char            *prefix;
7131   char                   pprefix[256];
7132   const PetscInt        *globidx, *P_oth_idx;
7133   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7134   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7135   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7136                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7137                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7138   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7139 
7140   MatProductType ptype;
7141   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7142   PetscMPIInt    size;
7143 
7144   PetscFunctionBegin;
7145   MatCheckProduct(C, 1);
7146   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7147   ptype = product->type;
7148   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7149     ptype                                          = MATPRODUCT_AB;
7150     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7151   }
7152   switch (ptype) {
7153   case MATPRODUCT_AB:
7154     A          = product->A;
7155     P          = product->B;
7156     m          = A->rmap->n;
7157     n          = P->cmap->n;
7158     M          = A->rmap->N;
7159     N          = P->cmap->N;
7160     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7161     break;
7162   case MATPRODUCT_AtB:
7163     P          = product->A;
7164     A          = product->B;
7165     m          = P->cmap->n;
7166     n          = A->cmap->n;
7167     M          = P->cmap->N;
7168     N          = A->cmap->N;
7169     hasoffproc = PETSC_TRUE;
7170     break;
7171   case MATPRODUCT_PtAP:
7172     A          = product->A;
7173     P          = product->B;
7174     m          = P->cmap->n;
7175     n          = P->cmap->n;
7176     M          = P->cmap->N;
7177     N          = P->cmap->N;
7178     hasoffproc = PETSC_TRUE;
7179     break;
7180   default:
7181     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7182   }
7183   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7184   if (size == 1) hasoffproc = PETSC_FALSE;
7185 
7186   /* defaults */
7187   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7188     mp[i]    = NULL;
7189     mptmp[i] = PETSC_FALSE;
7190     rmapt[i] = -1;
7191     cmapt[i] = -1;
7192     rmapa[i] = NULL;
7193     cmapa[i] = NULL;
7194   }
7195 
7196   /* customization */
7197   PetscCall(PetscNew(&mmdata));
7198   mmdata->reusesym = product->api_user;
7199   if (ptype == MATPRODUCT_AB) {
7200     if (product->api_user) {
7201       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7202       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7203       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7204       PetscOptionsEnd();
7205     } else {
7206       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7207       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7208       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7209       PetscOptionsEnd();
7210     }
7211   } else if (ptype == MATPRODUCT_PtAP) {
7212     if (product->api_user) {
7213       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7214       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7215       PetscOptionsEnd();
7216     } else {
7217       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7218       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7219       PetscOptionsEnd();
7220     }
7221   }
7222   a = (Mat_MPIAIJ *)A->data;
7223   p = (Mat_MPIAIJ *)P->data;
7224   PetscCall(MatSetSizes(C, m, n, M, N));
7225   PetscCall(PetscLayoutSetUp(C->rmap));
7226   PetscCall(PetscLayoutSetUp(C->cmap));
7227   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7228   PetscCall(MatGetOptionsPrefix(C, &prefix));
7229 
7230   cp = 0;
7231   switch (ptype) {
7232   case MATPRODUCT_AB: /* A * P */
7233     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7234 
7235     /* A_diag * P_local (merged or not) */
7236     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7237       /* P is product->B */
7238       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7239       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7240       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7241       PetscCall(MatProductSetFill(mp[cp], product->fill));
7242       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7243       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7244       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7245       mp[cp]->product->api_user = product->api_user;
7246       PetscCall(MatProductSetFromOptions(mp[cp]));
7247       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7248       PetscCall(ISGetIndices(glob, &globidx));
7249       rmapt[cp] = 1;
7250       cmapt[cp] = 2;
7251       cmapa[cp] = globidx;
7252       mptmp[cp] = PETSC_FALSE;
7253       cp++;
7254     } else { /* A_diag * P_diag and A_diag * P_off */
7255       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7256       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7257       PetscCall(MatProductSetFill(mp[cp], product->fill));
7258       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7259       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7260       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7261       mp[cp]->product->api_user = product->api_user;
7262       PetscCall(MatProductSetFromOptions(mp[cp]));
7263       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7264       rmapt[cp] = 1;
7265       cmapt[cp] = 1;
7266       mptmp[cp] = PETSC_FALSE;
7267       cp++;
7268       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7269       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7270       PetscCall(MatProductSetFill(mp[cp], product->fill));
7271       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7272       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7273       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7274       mp[cp]->product->api_user = product->api_user;
7275       PetscCall(MatProductSetFromOptions(mp[cp]));
7276       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7277       rmapt[cp] = 1;
7278       cmapt[cp] = 2;
7279       cmapa[cp] = p->garray;
7280       mptmp[cp] = PETSC_FALSE;
7281       cp++;
7282     }
7283 
7284     /* A_off * P_other */
7285     if (mmdata->P_oth) {
7286       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7287       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7288       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7289       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7290       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7291       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7292       PetscCall(MatProductSetFill(mp[cp], product->fill));
7293       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7294       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7295       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7296       mp[cp]->product->api_user = product->api_user;
7297       PetscCall(MatProductSetFromOptions(mp[cp]));
7298       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7299       rmapt[cp] = 1;
7300       cmapt[cp] = 2;
7301       cmapa[cp] = P_oth_idx;
7302       mptmp[cp] = PETSC_FALSE;
7303       cp++;
7304     }
7305     break;
7306 
7307   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7308     /* A is product->B */
7309     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7310     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7311       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7312       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7313       PetscCall(MatProductSetFill(mp[cp], product->fill));
7314       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7315       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7316       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7317       mp[cp]->product->api_user = product->api_user;
7318       PetscCall(MatProductSetFromOptions(mp[cp]));
7319       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7320       PetscCall(ISGetIndices(glob, &globidx));
7321       rmapt[cp] = 2;
7322       rmapa[cp] = globidx;
7323       cmapt[cp] = 2;
7324       cmapa[cp] = globidx;
7325       mptmp[cp] = PETSC_FALSE;
7326       cp++;
7327     } else {
7328       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7329       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7330       PetscCall(MatProductSetFill(mp[cp], product->fill));
7331       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7332       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7333       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7334       mp[cp]->product->api_user = product->api_user;
7335       PetscCall(MatProductSetFromOptions(mp[cp]));
7336       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7337       PetscCall(ISGetIndices(glob, &globidx));
7338       rmapt[cp] = 1;
7339       cmapt[cp] = 2;
7340       cmapa[cp] = globidx;
7341       mptmp[cp] = PETSC_FALSE;
7342       cp++;
7343       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7344       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7345       PetscCall(MatProductSetFill(mp[cp], product->fill));
7346       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7347       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7348       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7349       mp[cp]->product->api_user = product->api_user;
7350       PetscCall(MatProductSetFromOptions(mp[cp]));
7351       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7352       rmapt[cp] = 2;
7353       rmapa[cp] = p->garray;
7354       cmapt[cp] = 2;
7355       cmapa[cp] = globidx;
7356       mptmp[cp] = PETSC_FALSE;
7357       cp++;
7358     }
7359     break;
7360   case MATPRODUCT_PtAP:
7361     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7362     /* P is product->B */
7363     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7364     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7365     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7366     PetscCall(MatProductSetFill(mp[cp], product->fill));
7367     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7368     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7369     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7370     mp[cp]->product->api_user = product->api_user;
7371     PetscCall(MatProductSetFromOptions(mp[cp]));
7372     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7373     PetscCall(ISGetIndices(glob, &globidx));
7374     rmapt[cp] = 2;
7375     rmapa[cp] = globidx;
7376     cmapt[cp] = 2;
7377     cmapa[cp] = globidx;
7378     mptmp[cp] = PETSC_FALSE;
7379     cp++;
7380     if (mmdata->P_oth) {
7381       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7382       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7383       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7384       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7385       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7386       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7387       PetscCall(MatProductSetFill(mp[cp], product->fill));
7388       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7389       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7390       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7391       mp[cp]->product->api_user = product->api_user;
7392       PetscCall(MatProductSetFromOptions(mp[cp]));
7393       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7394       mptmp[cp] = PETSC_TRUE;
7395       cp++;
7396       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7397       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7398       PetscCall(MatProductSetFill(mp[cp], product->fill));
7399       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7400       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7401       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7402       mp[cp]->product->api_user = product->api_user;
7403       PetscCall(MatProductSetFromOptions(mp[cp]));
7404       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7405       rmapt[cp] = 2;
7406       rmapa[cp] = globidx;
7407       cmapt[cp] = 2;
7408       cmapa[cp] = P_oth_idx;
7409       mptmp[cp] = PETSC_FALSE;
7410       cp++;
7411     }
7412     break;
7413   default:
7414     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7415   }
7416   /* sanity check */
7417   if (size > 1)
7418     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7419 
7420   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7421   for (i = 0; i < cp; i++) {
7422     mmdata->mp[i]    = mp[i];
7423     mmdata->mptmp[i] = mptmp[i];
7424   }
7425   mmdata->cp             = cp;
7426   C->product->data       = mmdata;
7427   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7428   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7429 
7430   /* memory type */
7431   mmdata->mtype = PETSC_MEMTYPE_HOST;
7432   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7433   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7434   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7435   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7436   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7437   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7438 
7439   /* prepare coo coordinates for values insertion */
7440 
7441   /* count total nonzeros of those intermediate seqaij Mats
7442     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7443     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7444     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7445   */
7446   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7447     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7448     if (mptmp[cp]) continue;
7449     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7450       const PetscInt *rmap = rmapa[cp];
7451       const PetscInt  mr   = mp[cp]->rmap->n;
7452       const PetscInt  rs   = C->rmap->rstart;
7453       const PetscInt  re   = C->rmap->rend;
7454       const PetscInt *ii   = mm->i;
7455       for (i = 0; i < mr; i++) {
7456         const PetscInt gr = rmap[i];
7457         const PetscInt nz = ii[i + 1] - ii[i];
7458         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7459         else ncoo_oown += nz;                  /* this row is local */
7460       }
7461     } else ncoo_d += mm->nz;
7462   }
7463 
7464   /*
7465     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7466 
7467     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7468 
7469     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7470 
7471     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7472     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7473     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7474 
7475     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7476     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7477   */
7478   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7479   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7480 
7481   /* gather (i,j) of nonzeros inserted by remote procs */
7482   if (hasoffproc) {
7483     PetscSF  msf;
7484     PetscInt ncoo2, *coo_i2, *coo_j2;
7485 
7486     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7487     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7488     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7489 
7490     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7491       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7492       PetscInt   *idxoff = mmdata->off[cp];
7493       PetscInt   *idxown = mmdata->own[cp];
7494       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7495         const PetscInt *rmap = rmapa[cp];
7496         const PetscInt *cmap = cmapa[cp];
7497         const PetscInt *ii   = mm->i;
7498         PetscInt       *coi  = coo_i + ncoo_o;
7499         PetscInt       *coj  = coo_j + ncoo_o;
7500         const PetscInt  mr   = mp[cp]->rmap->n;
7501         const PetscInt  rs   = C->rmap->rstart;
7502         const PetscInt  re   = C->rmap->rend;
7503         const PetscInt  cs   = C->cmap->rstart;
7504         for (i = 0; i < mr; i++) {
7505           const PetscInt *jj = mm->j + ii[i];
7506           const PetscInt  gr = rmap[i];
7507           const PetscInt  nz = ii[i + 1] - ii[i];
7508           if (gr < rs || gr >= re) { /* this is an offproc row */
7509             for (j = ii[i]; j < ii[i + 1]; j++) {
7510               *coi++    = gr;
7511               *idxoff++ = j;
7512             }
7513             if (!cmapt[cp]) { /* already global */
7514               for (j = 0; j < nz; j++) *coj++ = jj[j];
7515             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7516               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7517             } else { /* offdiag */
7518               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7519             }
7520             ncoo_o += nz;
7521           } else { /* this is a local row */
7522             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7523           }
7524         }
7525       }
7526       mmdata->off[cp + 1] = idxoff;
7527       mmdata->own[cp + 1] = idxown;
7528     }
7529 
7530     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7531     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7532     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7533     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7534     ncoo = ncoo_d + ncoo_oown + ncoo2;
7535     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7536     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7537     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7538     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7539     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7540     PetscCall(PetscFree2(coo_i, coo_j));
7541     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7542     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7543     coo_i = coo_i2;
7544     coo_j = coo_j2;
7545   } else { /* no offproc values insertion */
7546     ncoo = ncoo_d;
7547     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7548 
7549     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7550     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7551     PetscCall(PetscSFSetUp(mmdata->sf));
7552   }
7553   mmdata->hasoffproc = hasoffproc;
7554 
7555   /* gather (i,j) of nonzeros inserted locally */
7556   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7557     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7558     PetscInt       *coi  = coo_i + ncoo_d;
7559     PetscInt       *coj  = coo_j + ncoo_d;
7560     const PetscInt *jj   = mm->j;
7561     const PetscInt *ii   = mm->i;
7562     const PetscInt *cmap = cmapa[cp];
7563     const PetscInt *rmap = rmapa[cp];
7564     const PetscInt  mr   = mp[cp]->rmap->n;
7565     const PetscInt  rs   = C->rmap->rstart;
7566     const PetscInt  re   = C->rmap->rend;
7567     const PetscInt  cs   = C->cmap->rstart;
7568 
7569     if (mptmp[cp]) continue;
7570     if (rmapt[cp] == 1) { /* consecutive rows */
7571       /* fill coo_i */
7572       for (i = 0; i < mr; i++) {
7573         const PetscInt gr = i + rs;
7574         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7575       }
7576       /* fill coo_j */
7577       if (!cmapt[cp]) { /* type-0, already global */
7578         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7579       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7580         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7581       } else {                                            /* type-2, local to global for sparse columns */
7582         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7583       }
7584       ncoo_d += mm->nz;
7585     } else if (rmapt[cp] == 2) { /* sparse rows */
7586       for (i = 0; i < mr; i++) {
7587         const PetscInt *jj = mm->j + ii[i];
7588         const PetscInt  gr = rmap[i];
7589         const PetscInt  nz = ii[i + 1] - ii[i];
7590         if (gr >= rs && gr < re) { /* local rows */
7591           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7592           if (!cmapt[cp]) { /* type-0, already global */
7593             for (j = 0; j < nz; j++) *coj++ = jj[j];
7594           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7595             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7596           } else { /* type-2, local to global for sparse columns */
7597             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7598           }
7599           ncoo_d += nz;
7600         }
7601       }
7602     }
7603   }
7604   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7605   PetscCall(ISDestroy(&glob));
7606   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7607   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7608   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7609   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7610 
7611   /* preallocate with COO data */
7612   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7613   PetscCall(PetscFree2(coo_i, coo_j));
7614   PetscFunctionReturn(PETSC_SUCCESS);
7615 }
7616 
7617 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7618 {
7619   Mat_Product *product = mat->product;
7620 #if defined(PETSC_HAVE_DEVICE)
7621   PetscBool match  = PETSC_FALSE;
7622   PetscBool usecpu = PETSC_FALSE;
7623 #else
7624   PetscBool match = PETSC_TRUE;
7625 #endif
7626 
7627   PetscFunctionBegin;
7628   MatCheckProduct(mat, 1);
7629 #if defined(PETSC_HAVE_DEVICE)
7630   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7631   if (match) { /* we can always fallback to the CPU if requested */
7632     switch (product->type) {
7633     case MATPRODUCT_AB:
7634       if (product->api_user) {
7635         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7636         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7637         PetscOptionsEnd();
7638       } else {
7639         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7640         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7641         PetscOptionsEnd();
7642       }
7643       break;
7644     case MATPRODUCT_AtB:
7645       if (product->api_user) {
7646         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7647         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7648         PetscOptionsEnd();
7649       } else {
7650         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7651         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7652         PetscOptionsEnd();
7653       }
7654       break;
7655     case MATPRODUCT_PtAP:
7656       if (product->api_user) {
7657         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7658         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7659         PetscOptionsEnd();
7660       } else {
7661         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7662         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7663         PetscOptionsEnd();
7664       }
7665       break;
7666     default:
7667       break;
7668     }
7669     match = (PetscBool)!usecpu;
7670   }
7671 #endif
7672   if (match) {
7673     switch (product->type) {
7674     case MATPRODUCT_AB:
7675     case MATPRODUCT_AtB:
7676     case MATPRODUCT_PtAP:
7677       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7678       break;
7679     default:
7680       break;
7681     }
7682   }
7683   /* fallback to MPIAIJ ops */
7684   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7685   PetscFunctionReturn(PETSC_SUCCESS);
7686 }
7687 
7688 /*
7689    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7690 
7691    n - the number of block indices in cc[]
7692    cc - the block indices (must be large enough to contain the indices)
7693 */
7694 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7695 {
7696   PetscInt        cnt = -1, nidx, j;
7697   const PetscInt *idx;
7698 
7699   PetscFunctionBegin;
7700   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7701   if (nidx) {
7702     cnt     = 0;
7703     cc[cnt] = idx[0] / bs;
7704     for (j = 1; j < nidx; j++) {
7705       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7706     }
7707   }
7708   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7709   *n = cnt + 1;
7710   PetscFunctionReturn(PETSC_SUCCESS);
7711 }
7712 
7713 /*
7714     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7715 
7716     ncollapsed - the number of block indices
7717     collapsed - the block indices (must be large enough to contain the indices)
7718 */
7719 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7720 {
7721   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7722 
7723   PetscFunctionBegin;
7724   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7725   for (i = start + 1; i < start + bs; i++) {
7726     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7727     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7728     cprevtmp = cprev;
7729     cprev    = merged;
7730     merged   = cprevtmp;
7731   }
7732   *ncollapsed = nprev;
7733   if (collapsed) *collapsed = cprev;
7734   PetscFunctionReturn(PETSC_SUCCESS);
7735 }
7736 
7737 /*
7738  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7739 
7740  Input Parameter:
7741  . Amat - matrix
7742  - symmetrize - make the result symmetric
7743  + scale - scale with diagonal
7744 
7745  Output Parameter:
7746  . a_Gmat - output scalar graph >= 0
7747 
7748 */
7749 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7750 {
7751   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7752   MPI_Comm  comm;
7753   Mat       Gmat;
7754   PetscBool ismpiaij, isseqaij;
7755   Mat       a, b, c;
7756   MatType   jtype;
7757 
7758   PetscFunctionBegin;
7759   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7760   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7761   PetscCall(MatGetSize(Amat, &MM, &NN));
7762   PetscCall(MatGetBlockSize(Amat, &bs));
7763   nloc = (Iend - Istart) / bs;
7764 
7765   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7766   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7767   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7768 
7769   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7770   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7771      implementation */
7772   if (bs > 1) {
7773     PetscCall(MatGetType(Amat, &jtype));
7774     PetscCall(MatCreate(comm, &Gmat));
7775     PetscCall(MatSetType(Gmat, jtype));
7776     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7777     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7778     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7779       PetscInt  *d_nnz, *o_nnz;
7780       MatScalar *aa, val, *AA;
7781       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7782       if (isseqaij) {
7783         a = Amat;
7784         b = NULL;
7785       } else {
7786         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7787         a             = d->A;
7788         b             = d->B;
7789       }
7790       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7791       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7792       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7793         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7794         const PetscInt *cols1, *cols2;
7795         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7796           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7797           nnz[brow / bs] = nc2 / bs;
7798           if (nc2 % bs) ok = 0;
7799           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7800           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7801             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7802             if (nc1 != nc2) ok = 0;
7803             else {
7804               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7805                 if (cols1[jj] != cols2[jj]) ok = 0;
7806                 if (cols1[jj] % bs != jj % bs) ok = 0;
7807               }
7808             }
7809             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7810           }
7811           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7812           if (!ok) {
7813             PetscCall(PetscFree2(d_nnz, o_nnz));
7814             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7815             goto old_bs;
7816           }
7817         }
7818       }
7819       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7820       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7821       PetscCall(PetscFree2(d_nnz, o_nnz));
7822       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7823       // diag
7824       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7825         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7826         ai               = aseq->i;
7827         n                = ai[brow + 1] - ai[brow];
7828         aj               = aseq->j + ai[brow];
7829         for (int k = 0; k < n; k += bs) {        // block columns
7830           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7831           val        = 0;
7832           if (index_size == 0) {
7833             for (int ii = 0; ii < bs; ii++) { // rows in block
7834               aa = aseq->a + ai[brow + ii] + k;
7835               for (int jj = 0; jj < bs; jj++) {         // columns in block
7836                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7837               }
7838             }
7839           } else {                                       // use (index,index) value if provided
7840             for (int iii = 0; iii < index_size; iii++) { // rows in block
7841               int ii = index[iii];
7842               aa     = aseq->a + ai[brow + ii] + k;
7843               for (int jjj = 0; jjj < index_size; jjj++) { // columns in block
7844                 int jj = index[jjj];
7845                 val += PetscAbs(PetscRealPart(aa[jj]));
7846               }
7847             }
7848           }
7849           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7850           AA[k / bs] = val;
7851         }
7852         grow = Istart / bs + brow / bs;
7853         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7854       }
7855       // off-diag
7856       if (ismpiaij) {
7857         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7858         const PetscScalar *vals;
7859         const PetscInt    *cols, *garray = aij->garray;
7860         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7861         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7862           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7863           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7864             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7865             AA[k / bs] = 0;
7866             AJ[cidx]   = garray[cols[k]] / bs;
7867           }
7868           nc = ncols / bs;
7869           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7870           if (index_size == 0) {
7871             for (int ii = 0; ii < bs; ii++) { // rows in block
7872               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7873               for (int k = 0; k < ncols; k += bs) {
7874                 for (int jj = 0; jj < bs; jj++) { // cols in block
7875                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7876                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7877                 }
7878               }
7879               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7880             }
7881           } else {                                       // use (index,index) value if provided
7882             for (int iii = 0; iii < index_size; iii++) { // rows in block
7883               int ii = index[iii];
7884               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7885               for (int k = 0; k < ncols; k += bs) {
7886                 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block
7887                   int jj = index[jjj];
7888                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7889                 }
7890               }
7891               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7892             }
7893           }
7894           grow = Istart / bs + brow / bs;
7895           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7896         }
7897       }
7898       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7899       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7900       PetscCall(PetscFree2(AA, AJ));
7901     } else {
7902       const PetscScalar *vals;
7903       const PetscInt    *idx;
7904       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7905     old_bs:
7906       /*
7907        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7908        */
7909       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7910       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7911       if (isseqaij) {
7912         PetscInt max_d_nnz;
7913         /*
7914          Determine exact preallocation count for (sequential) scalar matrix
7915          */
7916         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7917         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7918         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7919         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7920         PetscCall(PetscFree3(w0, w1, w2));
7921       } else if (ismpiaij) {
7922         Mat             Daij, Oaij;
7923         const PetscInt *garray;
7924         PetscInt        max_d_nnz;
7925         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7926         /*
7927          Determine exact preallocation count for diagonal block portion of scalar matrix
7928          */
7929         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7930         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7931         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7932         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7933         PetscCall(PetscFree3(w0, w1, w2));
7934         /*
7935          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7936          */
7937         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7938           o_nnz[jj] = 0;
7939           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7940             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7941             o_nnz[jj] += ncols;
7942             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7943           }
7944           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7945         }
7946       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7947       /* get scalar copy (norms) of matrix */
7948       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7949       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7950       PetscCall(PetscFree2(d_nnz, o_nnz));
7951       for (Ii = Istart; Ii < Iend; Ii++) {
7952         PetscInt dest_row = Ii / bs;
7953         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7954         for (jj = 0; jj < ncols; jj++) {
7955           PetscInt    dest_col = idx[jj] / bs;
7956           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7957           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7958         }
7959         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7960       }
7961       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7962       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7963     }
7964   } else {
7965     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7966     else {
7967       Gmat = Amat;
7968       PetscCall(PetscObjectReference((PetscObject)Gmat));
7969     }
7970     if (isseqaij) {
7971       a = Gmat;
7972       b = NULL;
7973     } else {
7974       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7975       a             = d->A;
7976       b             = d->B;
7977     }
7978     if (filter >= 0 || scale) {
7979       /* take absolute value of each entry */
7980       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7981         MatInfo      info;
7982         PetscScalar *avals;
7983         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7984         PetscCall(MatSeqAIJGetArray(c, &avals));
7985         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7986         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7987       }
7988     }
7989   }
7990   if (symmetrize) {
7991     PetscBool isset, issym;
7992     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7993     if (!isset || !issym) {
7994       Mat matTrans;
7995       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7996       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7997       PetscCall(MatDestroy(&matTrans));
7998     }
7999     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8000   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8001   if (scale) {
8002     /* scale c for all diagonal values = 1 or -1 */
8003     Vec diag;
8004     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8005     PetscCall(MatGetDiagonal(Gmat, diag));
8006     PetscCall(VecReciprocal(diag));
8007     PetscCall(VecSqrtAbs(diag));
8008     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8009     PetscCall(VecDestroy(&diag));
8010   }
8011   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8012 
8013   if (filter >= 0) {
8014     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8015     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8016   }
8017   *a_Gmat = Gmat;
8018   PetscFunctionReturn(PETSC_SUCCESS);
8019 }
8020 
8021 /*
8022     Special version for direct calls from Fortran
8023 */
8024 #include <petsc/private/fortranimpl.h>
8025 
8026 /* Change these macros so can be used in void function */
8027 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8028 #undef PetscCall
8029 #define PetscCall(...) \
8030   do { \
8031     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8032     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8033       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8034       return; \
8035     } \
8036   } while (0)
8037 
8038 #undef SETERRQ
8039 #define SETERRQ(comm, ierr, ...) \
8040   do { \
8041     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8042     return; \
8043   } while (0)
8044 
8045 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8046   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8047 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8048   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8049 #else
8050 #endif
8051 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8052 {
8053   Mat         mat = *mmat;
8054   PetscInt    m = *mm, n = *mn;
8055   InsertMode  addv = *maddv;
8056   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8057   PetscScalar value;
8058 
8059   MatCheckPreallocated(mat, 1);
8060   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8061   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8062   {
8063     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8064     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8065     PetscBool roworiented = aij->roworiented;
8066 
8067     /* Some Variables required in the macro */
8068     Mat         A     = aij->A;
8069     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8070     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8071     MatScalar  *aa;
8072     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8073     Mat         B                 = aij->B;
8074     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8075     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8076     MatScalar  *ba;
8077     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8078      * cannot use "#if defined" inside a macro. */
8079     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8080 
8081     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8082     PetscInt   nonew = a->nonew;
8083     MatScalar *ap1, *ap2;
8084 
8085     PetscFunctionBegin;
8086     PetscCall(MatSeqAIJGetArray(A, &aa));
8087     PetscCall(MatSeqAIJGetArray(B, &ba));
8088     for (i = 0; i < m; i++) {
8089       if (im[i] < 0) continue;
8090       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8091       if (im[i] >= rstart && im[i] < rend) {
8092         row      = im[i] - rstart;
8093         lastcol1 = -1;
8094         rp1      = aj + ai[row];
8095         ap1      = aa + ai[row];
8096         rmax1    = aimax[row];
8097         nrow1    = ailen[row];
8098         low1     = 0;
8099         high1    = nrow1;
8100         lastcol2 = -1;
8101         rp2      = bj + bi[row];
8102         ap2      = ba + bi[row];
8103         rmax2    = bimax[row];
8104         nrow2    = bilen[row];
8105         low2     = 0;
8106         high2    = nrow2;
8107 
8108         for (j = 0; j < n; j++) {
8109           if (roworiented) value = v[i * n + j];
8110           else value = v[i + j * m];
8111           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8112           if (in[j] >= cstart && in[j] < cend) {
8113             col = in[j] - cstart;
8114             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8115           } else if (in[j] < 0) continue;
8116           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8117             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8118           } else {
8119             if (mat->was_assembled) {
8120               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8121 #if defined(PETSC_USE_CTABLE)
8122               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8123               col--;
8124 #else
8125               col = aij->colmap[in[j]] - 1;
8126 #endif
8127               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8128                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8129                 col = in[j];
8130                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8131                 B        = aij->B;
8132                 b        = (Mat_SeqAIJ *)B->data;
8133                 bimax    = b->imax;
8134                 bi       = b->i;
8135                 bilen    = b->ilen;
8136                 bj       = b->j;
8137                 rp2      = bj + bi[row];
8138                 ap2      = ba + bi[row];
8139                 rmax2    = bimax[row];
8140                 nrow2    = bilen[row];
8141                 low2     = 0;
8142                 high2    = nrow2;
8143                 bm       = aij->B->rmap->n;
8144                 ba       = b->a;
8145                 inserted = PETSC_FALSE;
8146               }
8147             } else col = in[j];
8148             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8149           }
8150         }
8151       } else if (!aij->donotstash) {
8152         if (roworiented) {
8153           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8154         } else {
8155           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8156         }
8157       }
8158     }
8159     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8160     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8161   }
8162   PetscFunctionReturnVoid();
8163 }
8164 
8165 /* Undefining these here since they were redefined from their original definition above! No
8166  * other PETSc functions should be defined past this point, as it is impossible to recover the
8167  * original definitions */
8168 #undef PetscCall
8169 #undef SETERRQ
8170