xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a3f1d042deeee8d591d0e166df91c7782e45ac59)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
170 {
171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
172 
173   PetscFunctionBegin;
174   if (mat->A) {
175     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
176     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
182 {
183   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
184   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
185   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
186   const PetscInt  *ia, *ib;
187   const MatScalar *aa, *bb, *aav, *bav;
188   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
189   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
190 
191   PetscFunctionBegin;
192   *keptrows = NULL;
193 
194   ia = a->i;
195   ib = b->i;
196   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
197   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
198   for (i = 0; i < m; i++) {
199     na = ia[i + 1] - ia[i];
200     nb = ib[i + 1] - ib[i];
201     if (!na && !nb) {
202       cnt++;
203       goto ok1;
204     }
205     aa = aav + ia[i];
206     for (j = 0; j < na; j++) {
207       if (aa[j] != 0.0) goto ok1;
208     }
209     bb = PetscSafePointerPlusOffset(bav, ib[i]);
210     for (j = 0; j < nb; j++) {
211       if (bb[j] != 0.0) goto ok1;
212     }
213     cnt++;
214   ok1:;
215   }
216   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
217   if (!n0rows) {
218     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
220     PetscFunctionReturn(PETSC_SUCCESS);
221   }
222   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
223   cnt = 0;
224   for (i = 0; i < m; i++) {
225     na = ia[i + 1] - ia[i];
226     nb = ib[i + 1] - ib[i];
227     if (!na && !nb) continue;
228     aa = aav + ia[i];
229     for (j = 0; j < na; j++) {
230       if (aa[j] != 0.0) {
231         rows[cnt++] = rstart + i;
232         goto ok2;
233       }
234     }
235     bb = PetscSafePointerPlusOffset(bav, ib[i]);
236     for (j = 0; j < nb; j++) {
237       if (bb[j] != 0.0) {
238         rows[cnt++] = rstart + i;
239         goto ok2;
240       }
241     }
242   ok2:;
243   }
244   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
245   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
251 {
252   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
253   PetscBool   cong;
254 
255   PetscFunctionBegin;
256   PetscCall(MatHasCongruentLayouts(Y, &cong));
257   if (Y->assembled && cong) {
258     PetscCall(MatDiagonalSet(aij->A, D, is));
259   } else {
260     PetscCall(MatDiagonalSet_Default(Y, D, is));
261   }
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
266 {
267   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
268   PetscInt    i, rstart, nrows, *rows;
269 
270   PetscFunctionBegin;
271   *zrows = NULL;
272   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
273   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
274   for (i = 0; i < nrows; i++) rows[i] += rstart;
275   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
280 {
281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
282   PetscInt           i, m, n, *garray = aij->garray;
283   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
284   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
285   PetscReal         *work;
286   const PetscScalar *dummy;
287   PetscMPIInt        in;
288 
289   PetscFunctionBegin;
290   PetscCall(MatGetSize(A, &m, &n));
291   PetscCall(PetscCalloc1(n, &work));
292   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
294   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
296   if (type == NORM_2) {
297     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
298     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
299   } else if (type == NORM_1) {
300     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
301     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
302   } else if (type == NORM_INFINITY) {
303     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
304     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
306     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
307     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
308   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
309     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
310     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
311   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
312   PetscCall(PetscMPIIntCast(n, &in));
313   if (type == NORM_INFINITY) {
314     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
315   } else {
316     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
317   }
318   PetscCall(PetscFree(work));
319   if (type == NORM_2) {
320     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
321   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
322     for (i = 0; i < n; i++) reductions[i] /= m;
323   }
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
328 {
329   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
330   IS              sis, gis;
331   const PetscInt *isis, *igis;
332   PetscInt        n, *iis, nsis, ngis, rstart, i;
333 
334   PetscFunctionBegin;
335   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
336   PetscCall(MatFindNonzeroRows(a->B, &gis));
337   PetscCall(ISGetSize(gis, &ngis));
338   PetscCall(ISGetSize(sis, &nsis));
339   PetscCall(ISGetIndices(sis, &isis));
340   PetscCall(ISGetIndices(gis, &igis));
341 
342   PetscCall(PetscMalloc1(ngis + nsis, &iis));
343   PetscCall(PetscArraycpy(iis, igis, ngis));
344   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
345   n = ngis + nsis;
346   PetscCall(PetscSortRemoveDupsInt(&n, iis));
347   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
348   for (i = 0; i < n; i++) iis[i] += rstart;
349   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
350 
351   PetscCall(ISRestoreIndices(sis, &isis));
352   PetscCall(ISRestoreIndices(gis, &igis));
353   PetscCall(ISDestroy(&sis));
354   PetscCall(ISDestroy(&gis));
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 /*
359   Local utility routine that creates a mapping from the global column
360 number to the local number in the off-diagonal part of the local
361 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
362 a slightly higher hash table cost; without it it is not scalable (each processor
363 has an order N integer array but is fast to access.
364 */
365 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
366 {
367   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
368   PetscInt    n   = aij->B->cmap->n, i;
369 
370   PetscFunctionBegin;
371   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
372 #if defined(PETSC_USE_CTABLE)
373   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
374   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
375 #else
376   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
377   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
378 #endif
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
383   do { \
384     if (col <= lastcol1) low1 = 0; \
385     else high1 = nrow1; \
386     lastcol1 = col; \
387     while (high1 - low1 > 5) { \
388       t = (low1 + high1) / 2; \
389       if (rp1[t] > col) high1 = t; \
390       else low1 = t; \
391     } \
392     for (_i = low1; _i < high1; _i++) { \
393       if (rp1[_i] > col) break; \
394       if (rp1[_i] == col) { \
395         if (addv == ADD_VALUES) { \
396           ap1[_i] += value; \
397           /* Not sure LogFlops will slow dow the code or not */ \
398           (void)PetscLogFlops(1.0); \
399         } else ap1[_i] = value; \
400         goto a_noinsert; \
401       } \
402     } \
403     if (value == 0.0 && ignorezeroentries && row != col) { \
404       low1  = 0; \
405       high1 = nrow1; \
406       goto a_noinsert; \
407     } \
408     if (nonew == 1) { \
409       low1  = 0; \
410       high1 = nrow1; \
411       goto a_noinsert; \
412     } \
413     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
414     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
415     N = nrow1++ - 1; \
416     a->nz++; \
417     high1++; \
418     /* shift up all the later entries in this row */ \
419     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
420     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
421     rp1[_i] = col; \
422     ap1[_i] = value; \
423   a_noinsert:; \
424     ailen[row] = nrow1; \
425   } while (0)
426 
427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
428   do { \
429     if (col <= lastcol2) low2 = 0; \
430     else high2 = nrow2; \
431     lastcol2 = col; \
432     while (high2 - low2 > 5) { \
433       t = (low2 + high2) / 2; \
434       if (rp2[t] > col) high2 = t; \
435       else low2 = t; \
436     } \
437     for (_i = low2; _i < high2; _i++) { \
438       if (rp2[_i] > col) break; \
439       if (rp2[_i] == col) { \
440         if (addv == ADD_VALUES) { \
441           ap2[_i] += value; \
442           (void)PetscLogFlops(1.0); \
443         } else ap2[_i] = value; \
444         goto b_noinsert; \
445       } \
446     } \
447     if (value == 0.0 && ignorezeroentries) { \
448       low2  = 0; \
449       high2 = nrow2; \
450       goto b_noinsert; \
451     } \
452     if (nonew == 1) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
458     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
459     N = nrow2++ - 1; \
460     b->nz++; \
461     high2++; \
462     /* shift up all the later entries in this row */ \
463     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
464     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
465     rp2[_i] = col; \
466     ap2[_i] = value; \
467   b_noinsert:; \
468     bilen[row] = nrow2; \
469   } while (0)
470 
471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
472 {
473   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
474   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
475   PetscInt     l, *garray                         = mat->garray, diag;
476   PetscScalar *aa, *ba;
477 
478   PetscFunctionBegin;
479   /* code only works for square matrices A */
480 
481   /* find size of row to the left of the diagonal part */
482   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
483   row = row - diag;
484   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
485     if (garray[b->j[b->i[row] + l]] > diag) break;
486   }
487   if (l) {
488     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
489     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
490     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
491   }
492 
493   /* diagonal part */
494   if (a->i[row + 1] - a->i[row]) {
495     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
496     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
497     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
498   }
499 
500   /* right of diagonal part */
501   if (b->i[row + 1] - b->i[row] - l) {
502     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
503     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
504     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
505   }
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
510 {
511   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
512   PetscScalar value = 0.0;
513   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
514   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
515   PetscBool   roworiented = aij->roworiented;
516 
517   /* Some Variables required in the macro */
518   Mat         A     = aij->A;
519   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
520   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
521   PetscBool   ignorezeroentries = a->ignorezeroentries;
522   Mat         B                 = aij->B;
523   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
524   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
525   MatScalar  *aa, *ba;
526   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
527   PetscInt    nonew;
528   MatScalar  *ap1, *ap2;
529 
530   PetscFunctionBegin;
531   PetscCall(MatSeqAIJGetArray(A, &aa));
532   PetscCall(MatSeqAIJGetArray(B, &ba));
533   for (i = 0; i < m; i++) {
534     if (im[i] < 0) continue;
535     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
536     if (im[i] >= rstart && im[i] < rend) {
537       row      = im[i] - rstart;
538       lastcol1 = -1;
539       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
540       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
541       rmax1    = aimax[row];
542       nrow1    = ailen[row];
543       low1     = 0;
544       high1    = nrow1;
545       lastcol2 = -1;
546       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
547       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
548       rmax2    = bimax[row];
549       nrow2    = bilen[row];
550       low2     = 0;
551       high2    = nrow2;
552 
553       for (j = 0; j < n; j++) {
554         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
555         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
556         if (in[j] >= cstart && in[j] < cend) {
557           col   = in[j] - cstart;
558           nonew = a->nonew;
559           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
560         } else if (in[j] < 0) {
561           continue;
562         } else {
563           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
564           if (mat->was_assembled) {
565             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
566 #if defined(PETSC_USE_CTABLE)
567             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
568             col--;
569 #else
570             col = aij->colmap[in[j]] - 1;
571 #endif
572             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
573               PetscCall(MatDisAssemble_MPIAIJ(mat));               /* Change aij->B from reduced/local format to expanded/global format */
574               col = in[j];
575               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
576               B     = aij->B;
577               b     = (Mat_SeqAIJ *)B->data;
578               bimax = b->imax;
579               bi    = b->i;
580               bilen = b->ilen;
581               bj    = b->j;
582               ba    = b->a;
583               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
584               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
585               rmax2 = bimax[row];
586               nrow2 = bilen[row];
587               low2  = 0;
588               high2 = nrow2;
589               bm    = aij->B->rmap->n;
590               ba    = b->a;
591             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
592               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
593                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
594               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
595             }
596           } else col = in[j];
597           nonew = b->nonew;
598           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
599         }
600       }
601     } else {
602       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
603       if (!aij->donotstash) {
604         mat->assembled = PETSC_FALSE;
605         if (roworiented) {
606           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
607         } else {
608           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
609         }
610       }
611     }
612   }
613   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
614   PetscCall(MatSeqAIJRestoreArray(B, &ba));
615   PetscFunctionReturn(PETSC_SUCCESS);
616 }
617 
618 /*
619     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
620     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
621     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
622 */
623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
624 {
625   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
626   Mat         A      = aij->A; /* diagonal part of the matrix */
627   Mat         B      = aij->B; /* off-diagonal part of the matrix */
628   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
629   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
630   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
631   PetscInt   *ailen = a->ilen, *aj = a->j;
632   PetscInt   *bilen = b->ilen, *bj = b->j;
633   PetscInt    am          = aij->A->rmap->n, j;
634   PetscInt    diag_so_far = 0, dnz;
635   PetscInt    offd_so_far = 0, onz;
636 
637   PetscFunctionBegin;
638   /* Iterate over all rows of the matrix */
639   for (j = 0; j < am; j++) {
640     dnz = onz = 0;
641     /*  Iterate over all non-zero columns of the current row */
642     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
643       /* If column is in the diagonal */
644       if (mat_j[col] >= cstart && mat_j[col] < cend) {
645         aj[diag_so_far++] = mat_j[col] - cstart;
646         dnz++;
647       } else { /* off-diagonal entries */
648         bj[offd_so_far++] = mat_j[col];
649         onz++;
650       }
651     }
652     ailen[j] = dnz;
653     bilen[j] = onz;
654   }
655   PetscFunctionReturn(PETSC_SUCCESS);
656 }
657 
658 /*
659     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
660     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
661     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
662     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
663     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
664 */
665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
666 {
667   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
668   Mat          A    = aij->A; /* diagonal part of the matrix */
669   Mat          B    = aij->B; /* off-diagonal part of the matrix */
670   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
671   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
672   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
673   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
674   PetscInt    *ailen = a->ilen, *aj = a->j;
675   PetscInt    *bilen = b->ilen, *bj = b->j;
676   PetscInt     am          = aij->A->rmap->n, j;
677   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
678   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
679   PetscScalar *aa = a->a, *ba = b->a;
680 
681   PetscFunctionBegin;
682   /* Iterate over all rows of the matrix */
683   for (j = 0; j < am; j++) {
684     dnz_row = onz_row = 0;
685     rowstart_offd     = full_offd_i[j];
686     rowstart_diag     = full_diag_i[j];
687     /*  Iterate over all non-zero columns of the current row */
688     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
689       /* If column is in the diagonal */
690       if (mat_j[col] >= cstart && mat_j[col] < cend) {
691         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
692         aa[rowstart_diag + dnz_row] = mat_a[col];
693         dnz_row++;
694       } else { /* off-diagonal entries */
695         bj[rowstart_offd + onz_row] = mat_j[col];
696         ba[rowstart_offd + onz_row] = mat_a[col];
697         onz_row++;
698       }
699     }
700     ailen[j] = dnz_row;
701     bilen[j] = onz_row;
702   }
703   PetscFunctionReturn(PETSC_SUCCESS);
704 }
705 
706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
707 {
708   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
709   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
710   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
711 
712   PetscFunctionBegin;
713   for (i = 0; i < m; i++) {
714     if (idxm[i] < 0) continue; /* negative row */
715     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
716     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
717     row = idxm[i] - rstart;
718     for (j = 0; j < n; j++) {
719       if (idxn[j] < 0) continue; /* negative column */
720       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
721       if (idxn[j] >= cstart && idxn[j] < cend) {
722         col = idxn[j] - cstart;
723         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
724       } else {
725         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
726 #if defined(PETSC_USE_CTABLE)
727         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
728         col--;
729 #else
730         col = aij->colmap[idxn[j]] - 1;
731 #endif
732         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
733         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
734       }
735     }
736   }
737   PetscFunctionReturn(PETSC_SUCCESS);
738 }
739 
740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
741 {
742   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
743   PetscInt    nstash, reallocs;
744 
745   PetscFunctionBegin;
746   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
747 
748   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
749   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
750   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
751   PetscFunctionReturn(PETSC_SUCCESS);
752 }
753 
754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
755 {
756   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
757   PetscMPIInt  n;
758   PetscInt     i, j, rstart, ncols, flg;
759   PetscInt    *row, *col;
760   PetscBool    other_disassembled;
761   PetscScalar *val;
762 
763   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
764 
765   PetscFunctionBegin;
766   if (!aij->donotstash && !mat->nooffprocentries) {
767     while (1) {
768       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
769       if (!flg) break;
770 
771       for (i = 0; i < n;) {
772         /* Now identify the consecutive vals belonging to the same row */
773         for (j = i, rstart = row[j]; j < n; j++) {
774           if (row[j] != rstart) break;
775         }
776         if (j < n) ncols = j - i;
777         else ncols = n - i;
778         /* Now assemble all these values with a single function call */
779         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
780         i = j;
781       }
782     }
783     PetscCall(MatStashScatterEnd_Private(&mat->stash));
784   }
785 #if defined(PETSC_HAVE_DEVICE)
786   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
787   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
788   if (mat->boundtocpu) {
789     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
790     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
791   }
792 #endif
793   PetscCall(MatAssemblyBegin(aij->A, mode));
794   PetscCall(MatAssemblyEnd(aij->A, mode));
795 
796   /* determine if any processor has disassembled, if so we must
797      also disassemble ourself, in order that we may reassemble. */
798   /*
799      if nonzero structure of submatrix B cannot change then we know that
800      no processor disassembled thus we can skip this stuff
801   */
802   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
803     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
804     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
805       PetscCall(MatDisAssemble_MPIAIJ(mat));
806     }
807   }
808   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
809   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
810 #if defined(PETSC_HAVE_DEVICE)
811   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
812 #endif
813   PetscCall(MatAssemblyBegin(aij->B, mode));
814   PetscCall(MatAssemblyEnd(aij->B, mode));
815 
816   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
817 
818   aij->rowvalues = NULL;
819 
820   PetscCall(VecDestroy(&aij->diag));
821 
822   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
823   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
824     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
825     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
826   }
827 #if defined(PETSC_HAVE_DEVICE)
828   mat->offloadmask = PETSC_OFFLOAD_BOTH;
829 #endif
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
834 {
835   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
836 
837   PetscFunctionBegin;
838   PetscCall(MatZeroEntries(l->A));
839   PetscCall(MatZeroEntries(l->B));
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
844 {
845   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
846   PetscInt   *lrows;
847   PetscInt    r, len;
848   PetscBool   cong;
849 
850   PetscFunctionBegin;
851   /* get locally owned rows */
852   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
853   PetscCall(MatHasCongruentLayouts(A, &cong));
854   /* fix right-hand side if needed */
855   if (x && b) {
856     const PetscScalar *xx;
857     PetscScalar       *bb;
858 
859     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
860     PetscCall(VecGetArrayRead(x, &xx));
861     PetscCall(VecGetArray(b, &bb));
862     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
863     PetscCall(VecRestoreArrayRead(x, &xx));
864     PetscCall(VecRestoreArray(b, &bb));
865   }
866 
867   if (diag != 0.0 && cong) {
868     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
869     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
870   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
871     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
872     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
873     PetscInt    nnwA, nnwB;
874     PetscBool   nnzA, nnzB;
875 
876     nnwA = aijA->nonew;
877     nnwB = aijB->nonew;
878     nnzA = aijA->keepnonzeropattern;
879     nnzB = aijB->keepnonzeropattern;
880     if (!nnzA) {
881       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
882       aijA->nonew = 0;
883     }
884     if (!nnzB) {
885       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
886       aijB->nonew = 0;
887     }
888     /* Must zero here before the next loop */
889     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
890     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
891     for (r = 0; r < len; ++r) {
892       const PetscInt row = lrows[r] + A->rmap->rstart;
893       if (row >= A->cmap->N) continue;
894       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
895     }
896     aijA->nonew = nnwA;
897     aijB->nonew = nnwB;
898   } else {
899     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
900     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
901   }
902   PetscCall(PetscFree(lrows));
903   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
904   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
905 
906   /* only change matrix nonzero state if pattern was allowed to be changed */
907   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
908     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
909     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
910   }
911   PetscFunctionReturn(PETSC_SUCCESS);
912 }
913 
914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
915 {
916   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
917   PetscInt           n = A->rmap->n;
918   PetscInt           i, j, r, m, len = 0;
919   PetscInt          *lrows, *owners = A->rmap->range;
920   PetscMPIInt        p = 0;
921   PetscSFNode       *rrows;
922   PetscSF            sf;
923   const PetscScalar *xx;
924   PetscScalar       *bb, *mask, *aij_a;
925   Vec                xmask, lmask;
926   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
927   const PetscInt    *aj, *ii, *ridx;
928   PetscScalar       *aa;
929 
930   PetscFunctionBegin;
931   /* Create SF where leaves are input rows and roots are owned rows */
932   PetscCall(PetscMalloc1(n, &lrows));
933   for (r = 0; r < n; ++r) lrows[r] = -1;
934   PetscCall(PetscMalloc1(N, &rrows));
935   for (r = 0; r < N; ++r) {
936     const PetscInt idx = rows[r];
937     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
938     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
939       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
940     }
941     rrows[r].rank  = p;
942     rrows[r].index = rows[r] - owners[p];
943   }
944   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
945   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
946   /* Collect flags for rows to be zeroed */
947   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
948   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
949   PetscCall(PetscSFDestroy(&sf));
950   /* Compress and put in row numbers */
951   for (r = 0; r < n; ++r)
952     if (lrows[r] >= 0) lrows[len++] = r;
953   /* zero diagonal part of matrix */
954   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
955   /* handle off-diagonal part of matrix */
956   PetscCall(MatCreateVecs(A, &xmask, NULL));
957   PetscCall(VecDuplicate(l->lvec, &lmask));
958   PetscCall(VecGetArray(xmask, &bb));
959   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
960   PetscCall(VecRestoreArray(xmask, &bb));
961   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
962   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
963   PetscCall(VecDestroy(&xmask));
964   if (x && b) { /* this code is buggy when the row and column layout don't match */
965     PetscBool cong;
966 
967     PetscCall(MatHasCongruentLayouts(A, &cong));
968     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
969     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
970     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
971     PetscCall(VecGetArrayRead(l->lvec, &xx));
972     PetscCall(VecGetArray(b, &bb));
973   }
974   PetscCall(VecGetArray(lmask, &mask));
975   /* remove zeroed rows of off-diagonal matrix */
976   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
977   ii = aij->i;
978   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
979   /* loop over all elements of off process part of matrix zeroing removed columns*/
980   if (aij->compressedrow.use) {
981     m    = aij->compressedrow.nrows;
982     ii   = aij->compressedrow.i;
983     ridx = aij->compressedrow.rindex;
984     for (i = 0; i < m; i++) {
985       n  = ii[i + 1] - ii[i];
986       aj = aij->j + ii[i];
987       aa = aij_a + ii[i];
988 
989       for (j = 0; j < n; j++) {
990         if (PetscAbsScalar(mask[*aj])) {
991           if (b) bb[*ridx] -= *aa * xx[*aj];
992           *aa = 0.0;
993         }
994         aa++;
995         aj++;
996       }
997       ridx++;
998     }
999   } else { /* do not use compressed row format */
1000     m = l->B->rmap->n;
1001     for (i = 0; i < m; i++) {
1002       n  = ii[i + 1] - ii[i];
1003       aj = aij->j + ii[i];
1004       aa = aij_a + ii[i];
1005       for (j = 0; j < n; j++) {
1006         if (PetscAbsScalar(mask[*aj])) {
1007           if (b) bb[i] -= *aa * xx[*aj];
1008           *aa = 0.0;
1009         }
1010         aa++;
1011         aj++;
1012       }
1013     }
1014   }
1015   if (x && b) {
1016     PetscCall(VecRestoreArray(b, &bb));
1017     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1018   }
1019   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1020   PetscCall(VecRestoreArray(lmask, &mask));
1021   PetscCall(VecDestroy(&lmask));
1022   PetscCall(PetscFree(lrows));
1023 
1024   /* only change matrix nonzero state if pattern was allowed to be changed */
1025   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1026     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1027     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1028   }
1029   PetscFunctionReturn(PETSC_SUCCESS);
1030 }
1031 
1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1033 {
1034   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1035   PetscInt    nt;
1036   VecScatter  Mvctx = a->Mvctx;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(VecGetLocalSize(xx, &nt));
1040   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1041   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1042   PetscUseTypeMethod(a->A, mult, xx, yy);
1043   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1044   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1045   PetscFunctionReturn(PETSC_SUCCESS);
1046 }
1047 
1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1049 {
1050   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1051 
1052   PetscFunctionBegin;
1053   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1058 {
1059   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1060   VecScatter  Mvctx = a->Mvctx;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1065   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1066   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1071 {
1072   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1073 
1074   PetscFunctionBegin;
1075   /* do nondiagonal part */
1076   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1077   /* do local part */
1078   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1079   /* add partial results together */
1080   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1081   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1082   PetscFunctionReturn(PETSC_SUCCESS);
1083 }
1084 
1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1086 {
1087   MPI_Comm    comm;
1088   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1089   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1090   IS          Me, Notme;
1091   PetscInt    M, N, first, last, *notme, i;
1092   PetscBool   lf;
1093   PetscMPIInt size;
1094 
1095   PetscFunctionBegin;
1096   /* Easy test: symmetric diagonal block */
1097   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1098   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1099   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1100   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1101   PetscCallMPI(MPI_Comm_size(comm, &size));
1102   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1103 
1104   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1105   PetscCall(MatGetSize(Amat, &M, &N));
1106   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1107   PetscCall(PetscMalloc1(N - last + first, &notme));
1108   for (i = 0; i < first; i++) notme[i] = i;
1109   for (i = last; i < M; i++) notme[i - last + first] = i;
1110   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1111   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1112   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1113   Aoff = Aoffs[0];
1114   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1115   Boff = Boffs[0];
1116   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1117   PetscCall(MatDestroyMatrices(1, &Aoffs));
1118   PetscCall(MatDestroyMatrices(1, &Boffs));
1119   PetscCall(ISDestroy(&Me));
1120   PetscCall(ISDestroy(&Notme));
1121   PetscCall(PetscFree(notme));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1126 {
1127   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1128 
1129   PetscFunctionBegin;
1130   /* do nondiagonal part */
1131   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1132   /* do local part */
1133   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1134   /* add partial results together */
1135   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1136   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1137   PetscFunctionReturn(PETSC_SUCCESS);
1138 }
1139 
1140 /*
1141   This only works correctly for square matrices where the subblock A->A is the
1142    diagonal block
1143 */
1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1145 {
1146   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1147 
1148   PetscFunctionBegin;
1149   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1150   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1151   PetscCall(MatGetDiagonal(a->A, v));
1152   PetscFunctionReturn(PETSC_SUCCESS);
1153 }
1154 
1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1156 {
1157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCall(MatScale(a->A, aa));
1161   PetscCall(MatScale(a->B, aa));
1162   PetscFunctionReturn(PETSC_SUCCESS);
1163 }
1164 
1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1166 {
1167   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1168   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1169   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1170   const PetscInt    *garray = aij->garray;
1171   const PetscScalar *aa, *ba;
1172   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1173   PetscInt64         nz, hnz;
1174   PetscInt          *rowlens;
1175   PetscInt          *colidxs;
1176   PetscScalar       *matvals;
1177   PetscMPIInt        rank;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(PetscViewerSetUp(viewer));
1181 
1182   M  = mat->rmap->N;
1183   N  = mat->cmap->N;
1184   m  = mat->rmap->n;
1185   rs = mat->rmap->rstart;
1186   cs = mat->cmap->rstart;
1187   nz = A->nz + B->nz;
1188 
1189   /* write matrix header */
1190   header[0] = MAT_FILE_CLASSID;
1191   header[1] = M;
1192   header[2] = N;
1193   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1194   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1195   if (rank == 0) {
1196     if (hnz > PETSC_INT_MAX) header[3] = PETSC_INT_MAX;
1197     else header[3] = (PetscInt)hnz;
1198   }
1199   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1200 
1201   /* fill in and store row lengths  */
1202   PetscCall(PetscMalloc1(m, &rowlens));
1203   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1204   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1205   PetscCall(PetscFree(rowlens));
1206 
1207   /* fill in and store column indices */
1208   PetscCall(PetscMalloc1(nz, &colidxs));
1209   for (cnt = 0, i = 0; i < m; i++) {
1210     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1211       if (garray[B->j[jb]] > cs) break;
1212       colidxs[cnt++] = garray[B->j[jb]];
1213     }
1214     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1215     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1216   }
1217   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1218   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1219   PetscCall(PetscFree(colidxs));
1220 
1221   /* fill in and store nonzero values */
1222   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1223   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1224   PetscCall(PetscMalloc1(nz, &matvals));
1225   for (cnt = 0, i = 0; i < m; i++) {
1226     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1227       if (garray[B->j[jb]] > cs) break;
1228       matvals[cnt++] = ba[jb];
1229     }
1230     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1231     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1232   }
1233   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1234   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1235   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1236   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1237   PetscCall(PetscFree(matvals));
1238 
1239   /* write block size option to the viewer's .info file */
1240   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 #include <petscdraw.h>
1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1246 {
1247   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1248   PetscMPIInt       rank = aij->rank, size = aij->size;
1249   PetscBool         isdraw, iascii, isbinary;
1250   PetscViewer       sviewer;
1251   PetscViewerFormat format;
1252 
1253   PetscFunctionBegin;
1254   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1255   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1256   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1257   if (iascii) {
1258     PetscCall(PetscViewerGetFormat(viewer, &format));
1259     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1260       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1261       PetscCall(PetscMalloc1(size, &nz));
1262       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1263       for (i = 0; i < (PetscInt)size; i++) {
1264         nmax = PetscMax(nmax, nz[i]);
1265         nmin = PetscMin(nmin, nz[i]);
1266         navg += nz[i];
1267       }
1268       PetscCall(PetscFree(nz));
1269       navg = navg / size;
1270       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1271       PetscFunctionReturn(PETSC_SUCCESS);
1272     }
1273     PetscCall(PetscViewerGetFormat(viewer, &format));
1274     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1275       MatInfo   info;
1276       PetscInt *inodes = NULL;
1277 
1278       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1279       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1280       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1281       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1282       if (!inodes) {
1283         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1284                                                      (double)info.memory));
1285       } else {
1286         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1287                                                      (double)info.memory));
1288       }
1289       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1290       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1291       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1292       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1293       PetscCall(PetscViewerFlush(viewer));
1294       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1295       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1296       PetscCall(VecScatterView(aij->Mvctx, viewer));
1297       PetscFunctionReturn(PETSC_SUCCESS);
1298     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1299       PetscInt inodecount, inodelimit, *inodes;
1300       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1301       if (inodes) {
1302         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1303       } else {
1304         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1305       }
1306       PetscFunctionReturn(PETSC_SUCCESS);
1307     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1308       PetscFunctionReturn(PETSC_SUCCESS);
1309     }
1310   } else if (isbinary) {
1311     if (size == 1) {
1312       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1313       PetscCall(MatView(aij->A, viewer));
1314     } else {
1315       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1316     }
1317     PetscFunctionReturn(PETSC_SUCCESS);
1318   } else if (iascii && size == 1) {
1319     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1320     PetscCall(MatView(aij->A, viewer));
1321     PetscFunctionReturn(PETSC_SUCCESS);
1322   } else if (isdraw) {
1323     PetscDraw draw;
1324     PetscBool isnull;
1325     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1326     PetscCall(PetscDrawIsNull(draw, &isnull));
1327     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1328   }
1329 
1330   { /* assemble the entire matrix onto first processor */
1331     Mat A = NULL, Av;
1332     IS  isrow, iscol;
1333 
1334     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1335     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1336     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1337     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1338     /*  The commented code uses MatCreateSubMatrices instead */
1339     /*
1340     Mat *AA, A = NULL, Av;
1341     IS  isrow,iscol;
1342 
1343     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1344     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1345     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1346     if (rank == 0) {
1347        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1348        A    = AA[0];
1349        Av   = AA[0];
1350     }
1351     PetscCall(MatDestroySubMatrices(1,&AA));
1352 */
1353     PetscCall(ISDestroy(&iscol));
1354     PetscCall(ISDestroy(&isrow));
1355     /*
1356        Everyone has to call to draw the matrix since the graphics waits are
1357        synchronized across all processors that share the PetscDraw object
1358     */
1359     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1360     if (rank == 0) {
1361       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1362       PetscCall(MatView_SeqAIJ(Av, sviewer));
1363     }
1364     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1365     PetscCall(MatDestroy(&A));
1366   }
1367   PetscFunctionReturn(PETSC_SUCCESS);
1368 }
1369 
1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1371 {
1372   PetscBool iascii, isdraw, issocket, isbinary;
1373 
1374   PetscFunctionBegin;
1375   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1376   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1377   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1378   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1379   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1380   PetscFunctionReturn(PETSC_SUCCESS);
1381 }
1382 
1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1384 {
1385   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1386   Vec         bb1 = NULL;
1387   PetscBool   hasop;
1388 
1389   PetscFunctionBegin;
1390   if (flag == SOR_APPLY_UPPER) {
1391     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1392     PetscFunctionReturn(PETSC_SUCCESS);
1393   }
1394 
1395   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1396 
1397   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1398     if (flag & SOR_ZERO_INITIAL_GUESS) {
1399       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1400       its--;
1401     }
1402 
1403     while (its--) {
1404       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1405       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1406 
1407       /* update rhs: bb1 = bb - B*x */
1408       PetscCall(VecScale(mat->lvec, -1.0));
1409       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1410 
1411       /* local sweep */
1412       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1413     }
1414   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419     while (its--) {
1420       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1421       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422 
1423       /* update rhs: bb1 = bb - B*x */
1424       PetscCall(VecScale(mat->lvec, -1.0));
1425       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1426 
1427       /* local sweep */
1428       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1429     }
1430   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1433       its--;
1434     }
1435     while (its--) {
1436       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438 
1439       /* update rhs: bb1 = bb - B*x */
1440       PetscCall(VecScale(mat->lvec, -1.0));
1441       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1442 
1443       /* local sweep */
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1445     }
1446   } else if (flag & SOR_EISENSTAT) {
1447     Vec xx1;
1448 
1449     PetscCall(VecDuplicate(bb, &xx1));
1450     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1451 
1452     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454     if (!mat->diag) {
1455       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1456       PetscCall(MatGetDiagonal(matin, mat->diag));
1457     }
1458     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1459     if (hasop) {
1460       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1461     } else {
1462       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1463     }
1464     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1465 
1466     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1467 
1468     /* local sweep */
1469     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1470     PetscCall(VecAXPY(xx, 1.0, xx1));
1471     PetscCall(VecDestroy(&xx1));
1472   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1473 
1474   PetscCall(VecDestroy(&bb1));
1475 
1476   matin->factorerrortype = mat->A->factorerrortype;
1477   PetscFunctionReturn(PETSC_SUCCESS);
1478 }
1479 
1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1481 {
1482   Mat             aA, aB, Aperm;
1483   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1484   PetscScalar    *aa, *ba;
1485   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1486   PetscSF         rowsf, sf;
1487   IS              parcolp = NULL;
1488   PetscBool       done;
1489 
1490   PetscFunctionBegin;
1491   PetscCall(MatGetLocalSize(A, &m, &n));
1492   PetscCall(ISGetIndices(rowp, &rwant));
1493   PetscCall(ISGetIndices(colp, &cwant));
1494   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1495 
1496   /* Invert row permutation to find out where my rows should go */
1497   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1498   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1499   PetscCall(PetscSFSetFromOptions(rowsf));
1500   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1501   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1502   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1503 
1504   /* Invert column permutation to find out where my columns should go */
1505   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1506   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1507   PetscCall(PetscSFSetFromOptions(sf));
1508   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1509   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1510   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1511   PetscCall(PetscSFDestroy(&sf));
1512 
1513   PetscCall(ISRestoreIndices(rowp, &rwant));
1514   PetscCall(ISRestoreIndices(colp, &cwant));
1515   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1516 
1517   /* Find out where my gcols should go */
1518   PetscCall(MatGetSize(aB, NULL, &ng));
1519   PetscCall(PetscMalloc1(ng, &gcdest));
1520   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1521   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1522   PetscCall(PetscSFSetFromOptions(sf));
1523   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1524   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1525   PetscCall(PetscSFDestroy(&sf));
1526 
1527   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1528   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1529   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1530   for (i = 0; i < m; i++) {
1531     PetscInt    row = rdest[i];
1532     PetscMPIInt rowner;
1533     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1534     for (j = ai[i]; j < ai[i + 1]; j++) {
1535       PetscInt    col = cdest[aj[j]];
1536       PetscMPIInt cowner;
1537       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1538       if (rowner == cowner) dnnz[i]++;
1539       else onnz[i]++;
1540     }
1541     for (j = bi[i]; j < bi[i + 1]; j++) {
1542       PetscInt    col = gcdest[bj[j]];
1543       PetscMPIInt cowner;
1544       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1545       if (rowner == cowner) dnnz[i]++;
1546       else onnz[i]++;
1547     }
1548   }
1549   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1550   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1551   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1552   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1553   PetscCall(PetscSFDestroy(&rowsf));
1554 
1555   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1556   PetscCall(MatSeqAIJGetArray(aA, &aa));
1557   PetscCall(MatSeqAIJGetArray(aB, &ba));
1558   for (i = 0; i < m; i++) {
1559     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1560     PetscInt  j0, rowlen;
1561     rowlen = ai[i + 1] - ai[i];
1562     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1563       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1564       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1565     }
1566     rowlen = bi[i + 1] - bi[i];
1567     for (j0 = j = 0; j < rowlen; j0 = j) {
1568       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1569       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1570     }
1571   }
1572   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1573   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1574   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1575   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1576   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1577   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1578   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1579   PetscCall(PetscFree3(work, rdest, cdest));
1580   PetscCall(PetscFree(gcdest));
1581   if (parcolp) PetscCall(ISDestroy(&colp));
1582   *B = Aperm;
1583   PetscFunctionReturn(PETSC_SUCCESS);
1584 }
1585 
1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1587 {
1588   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1589 
1590   PetscFunctionBegin;
1591   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1592   if (ghosts) *ghosts = aij->garray;
1593   PetscFunctionReturn(PETSC_SUCCESS);
1594 }
1595 
1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1597 {
1598   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1599   Mat            A = mat->A, B = mat->B;
1600   PetscLogDouble isend[5], irecv[5];
1601 
1602   PetscFunctionBegin;
1603   info->block_size = 1.0;
1604   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1605 
1606   isend[0] = info->nz_used;
1607   isend[1] = info->nz_allocated;
1608   isend[2] = info->nz_unneeded;
1609   isend[3] = info->memory;
1610   isend[4] = info->mallocs;
1611 
1612   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1613 
1614   isend[0] += info->nz_used;
1615   isend[1] += info->nz_allocated;
1616   isend[2] += info->nz_unneeded;
1617   isend[3] += info->memory;
1618   isend[4] += info->mallocs;
1619   if (flag == MAT_LOCAL) {
1620     info->nz_used      = isend[0];
1621     info->nz_allocated = isend[1];
1622     info->nz_unneeded  = isend[2];
1623     info->memory       = isend[3];
1624     info->mallocs      = isend[4];
1625   } else if (flag == MAT_GLOBAL_MAX) {
1626     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1627 
1628     info->nz_used      = irecv[0];
1629     info->nz_allocated = irecv[1];
1630     info->nz_unneeded  = irecv[2];
1631     info->memory       = irecv[3];
1632     info->mallocs      = irecv[4];
1633   } else if (flag == MAT_GLOBAL_SUM) {
1634     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1635 
1636     info->nz_used      = irecv[0];
1637     info->nz_allocated = irecv[1];
1638     info->nz_unneeded  = irecv[2];
1639     info->memory       = irecv[3];
1640     info->mallocs      = irecv[4];
1641   }
1642   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1643   info->fill_ratio_needed = 0;
1644   info->factor_mallocs    = 0;
1645   PetscFunctionReturn(PETSC_SUCCESS);
1646 }
1647 
1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1649 {
1650   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1651 
1652   PetscFunctionBegin;
1653   switch (op) {
1654   case MAT_NEW_NONZERO_LOCATIONS:
1655   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1656   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1657   case MAT_KEEP_NONZERO_PATTERN:
1658   case MAT_NEW_NONZERO_LOCATION_ERR:
1659   case MAT_USE_INODES:
1660   case MAT_IGNORE_ZERO_ENTRIES:
1661   case MAT_FORM_EXPLICIT_TRANSPOSE:
1662     MatCheckPreallocated(A, 1);
1663     PetscCall(MatSetOption(a->A, op, flg));
1664     PetscCall(MatSetOption(a->B, op, flg));
1665     break;
1666   case MAT_ROW_ORIENTED:
1667     MatCheckPreallocated(A, 1);
1668     a->roworiented = flg;
1669 
1670     PetscCall(MatSetOption(a->A, op, flg));
1671     PetscCall(MatSetOption(a->B, op, flg));
1672     break;
1673   case MAT_FORCE_DIAGONAL_ENTRIES:
1674   case MAT_SORTED_FULL:
1675     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1676     break;
1677   case MAT_IGNORE_OFF_PROC_ENTRIES:
1678     a->donotstash = flg;
1679     break;
1680   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1681   case MAT_SPD:
1682   case MAT_SYMMETRIC:
1683   case MAT_STRUCTURALLY_SYMMETRIC:
1684   case MAT_HERMITIAN:
1685   case MAT_SYMMETRY_ETERNAL:
1686   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1687   case MAT_SPD_ETERNAL:
1688     /* if the diagonal matrix is square it inherits some of the properties above */
1689     break;
1690   case MAT_SUBMAT_SINGLEIS:
1691     A->submat_singleis = flg;
1692     break;
1693   case MAT_STRUCTURE_ONLY:
1694     /* The option is handled directly by MatSetOption() */
1695     break;
1696   default:
1697     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1698   }
1699   PetscFunctionReturn(PETSC_SUCCESS);
1700 }
1701 
1702 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1703 {
1704   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1705   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1706   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1707   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1708   PetscInt    *cmap, *idx_p;
1709 
1710   PetscFunctionBegin;
1711   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1712   mat->getrowactive = PETSC_TRUE;
1713 
1714   if (!mat->rowvalues && (idx || v)) {
1715     /*
1716         allocate enough space to hold information from the longest row.
1717     */
1718     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1719     PetscInt    max = 1, tmp;
1720     for (i = 0; i < matin->rmap->n; i++) {
1721       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1722       if (max < tmp) max = tmp;
1723     }
1724     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1725   }
1726 
1727   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1728   lrow = row - rstart;
1729 
1730   pvA = &vworkA;
1731   pcA = &cworkA;
1732   pvB = &vworkB;
1733   pcB = &cworkB;
1734   if (!v) {
1735     pvA = NULL;
1736     pvB = NULL;
1737   }
1738   if (!idx) {
1739     pcA = NULL;
1740     if (!v) pcB = NULL;
1741   }
1742   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1743   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1744   nztot = nzA + nzB;
1745 
1746   cmap = mat->garray;
1747   if (v || idx) {
1748     if (nztot) {
1749       /* Sort by increasing column numbers, assuming A and B already sorted */
1750       PetscInt imark = -1;
1751       if (v) {
1752         *v = v_p = mat->rowvalues;
1753         for (i = 0; i < nzB; i++) {
1754           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1755           else break;
1756         }
1757         imark = i;
1758         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1759         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1760       }
1761       if (idx) {
1762         *idx = idx_p = mat->rowindices;
1763         if (imark > -1) {
1764           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1765         } else {
1766           for (i = 0; i < nzB; i++) {
1767             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1768             else break;
1769           }
1770           imark = i;
1771         }
1772         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1773         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1774       }
1775     } else {
1776       if (idx) *idx = NULL;
1777       if (v) *v = NULL;
1778     }
1779   }
1780   *nz = nztot;
1781   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1782   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1783   PetscFunctionReturn(PETSC_SUCCESS);
1784 }
1785 
1786 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1787 {
1788   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1789 
1790   PetscFunctionBegin;
1791   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1792   aij->getrowactive = PETSC_FALSE;
1793   PetscFunctionReturn(PETSC_SUCCESS);
1794 }
1795 
1796 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1797 {
1798   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1799   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1800   PetscInt         i, j, cstart = mat->cmap->rstart;
1801   PetscReal        sum = 0.0;
1802   const MatScalar *v, *amata, *bmata;
1803   PetscMPIInt      iN;
1804 
1805   PetscFunctionBegin;
1806   if (aij->size == 1) {
1807     PetscCall(MatNorm(aij->A, type, norm));
1808   } else {
1809     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1810     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1811     if (type == NORM_FROBENIUS) {
1812       v = amata;
1813       for (i = 0; i < amat->nz; i++) {
1814         sum += PetscRealPart(PetscConj(*v) * (*v));
1815         v++;
1816       }
1817       v = bmata;
1818       for (i = 0; i < bmat->nz; i++) {
1819         sum += PetscRealPart(PetscConj(*v) * (*v));
1820         v++;
1821       }
1822       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1823       *norm = PetscSqrtReal(*norm);
1824       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1825     } else if (type == NORM_1) { /* max column norm */
1826       PetscReal *tmp, *tmp2;
1827       PetscInt  *jj, *garray = aij->garray;
1828       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1829       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1830       *norm = 0.0;
1831       v     = amata;
1832       jj    = amat->j;
1833       for (j = 0; j < amat->nz; j++) {
1834         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1835         v++;
1836       }
1837       v  = bmata;
1838       jj = bmat->j;
1839       for (j = 0; j < bmat->nz; j++) {
1840         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1841         v++;
1842       }
1843       PetscCall(PetscMPIIntCast(mat->cmap->N, &iN));
1844       PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1845       for (j = 0; j < mat->cmap->N; j++) {
1846         if (tmp2[j] > *norm) *norm = tmp2[j];
1847       }
1848       PetscCall(PetscFree(tmp));
1849       PetscCall(PetscFree(tmp2));
1850       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1851     } else if (type == NORM_INFINITY) { /* max row norm */
1852       PetscReal ntemp = 0.0;
1853       for (j = 0; j < aij->A->rmap->n; j++) {
1854         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1855         sum = 0.0;
1856         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1857           sum += PetscAbsScalar(*v);
1858           v++;
1859         }
1860         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1861         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1862           sum += PetscAbsScalar(*v);
1863           v++;
1864         }
1865         if (sum > ntemp) ntemp = sum;
1866       }
1867       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1868       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1869     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1870     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1871     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1872   }
1873   PetscFunctionReturn(PETSC_SUCCESS);
1874 }
1875 
1876 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1877 {
1878   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1879   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1880   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1881   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1882   Mat              B, A_diag, *B_diag;
1883   const MatScalar *pbv, *bv;
1884 
1885   PetscFunctionBegin;
1886   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1887   ma = A->rmap->n;
1888   na = A->cmap->n;
1889   mb = a->B->rmap->n;
1890   nb = a->B->cmap->n;
1891   ai = Aloc->i;
1892   aj = Aloc->j;
1893   bi = Bloc->i;
1894   bj = Bloc->j;
1895   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1896     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1897     PetscSFNode         *oloc;
1898     PETSC_UNUSED PetscSF sf;
1899 
1900     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1901     /* compute d_nnz for preallocation */
1902     PetscCall(PetscArrayzero(d_nnz, na));
1903     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1904     /* compute local off-diagonal contributions */
1905     PetscCall(PetscArrayzero(g_nnz, nb));
1906     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1907     /* map those to global */
1908     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1909     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1910     PetscCall(PetscSFSetFromOptions(sf));
1911     PetscCall(PetscArrayzero(o_nnz, na));
1912     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1913     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1914     PetscCall(PetscSFDestroy(&sf));
1915 
1916     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1917     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1918     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1919     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1920     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1921     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1922   } else {
1923     B = *matout;
1924     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1925   }
1926 
1927   b           = (Mat_MPIAIJ *)B->data;
1928   A_diag      = a->A;
1929   B_diag      = &b->A;
1930   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1931   A_diag_ncol = A_diag->cmap->N;
1932   B_diag_ilen = sub_B_diag->ilen;
1933   B_diag_i    = sub_B_diag->i;
1934 
1935   /* Set ilen for diagonal of B */
1936   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1937 
1938   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1939   very quickly (=without using MatSetValues), because all writes are local. */
1940   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1941   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1942 
1943   /* copy over the B part */
1944   PetscCall(PetscMalloc1(bi[mb], &cols));
1945   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1946   pbv = bv;
1947   row = A->rmap->rstart;
1948   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1949   cols_tmp = cols;
1950   for (i = 0; i < mb; i++) {
1951     ncol = bi[i + 1] - bi[i];
1952     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1953     row++;
1954     if (pbv) pbv += ncol;
1955     if (cols_tmp) cols_tmp += ncol;
1956   }
1957   PetscCall(PetscFree(cols));
1958   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1959 
1960   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1961   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1962   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1963     *matout = B;
1964   } else {
1965     PetscCall(MatHeaderMerge(A, &B));
1966   }
1967   PetscFunctionReturn(PETSC_SUCCESS);
1968 }
1969 
1970 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1971 {
1972   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1973   Mat         a = aij->A, b = aij->B;
1974   PetscInt    s1, s2, s3;
1975 
1976   PetscFunctionBegin;
1977   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1978   if (rr) {
1979     PetscCall(VecGetLocalSize(rr, &s1));
1980     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1981     /* Overlap communication with computation. */
1982     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1983   }
1984   if (ll) {
1985     PetscCall(VecGetLocalSize(ll, &s1));
1986     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1987     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1988   }
1989   /* scale  the diagonal block */
1990   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1991 
1992   if (rr) {
1993     /* Do a scatter end and then right scale the off-diagonal block */
1994     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1995     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1996   }
1997   PetscFunctionReturn(PETSC_SUCCESS);
1998 }
1999 
2000 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2001 {
2002   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2003 
2004   PetscFunctionBegin;
2005   PetscCall(MatSetUnfactored(a->A));
2006   PetscFunctionReturn(PETSC_SUCCESS);
2007 }
2008 
2009 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2010 {
2011   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2012   Mat         a, b, c, d;
2013   PetscBool   flg;
2014 
2015   PetscFunctionBegin;
2016   a = matA->A;
2017   b = matA->B;
2018   c = matB->A;
2019   d = matB->B;
2020 
2021   PetscCall(MatEqual(a, c, &flg));
2022   if (flg) PetscCall(MatEqual(b, d, &flg));
2023   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2024   PetscFunctionReturn(PETSC_SUCCESS);
2025 }
2026 
2027 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2028 {
2029   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2030   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2031 
2032   PetscFunctionBegin;
2033   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2034   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2035     /* because of the column compression in the off-processor part of the matrix a->B,
2036        the number of columns in a->B and b->B may be different, hence we cannot call
2037        the MatCopy() directly on the two parts. If need be, we can provide a more
2038        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2039        then copying the submatrices */
2040     PetscCall(MatCopy_Basic(A, B, str));
2041   } else {
2042     PetscCall(MatCopy(a->A, b->A, str));
2043     PetscCall(MatCopy(a->B, b->B, str));
2044   }
2045   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2046   PetscFunctionReturn(PETSC_SUCCESS);
2047 }
2048 
2049 /*
2050    Computes the number of nonzeros per row needed for preallocation when X and Y
2051    have different nonzero structure.
2052 */
2053 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2054 {
2055   PetscInt i, j, k, nzx, nzy;
2056 
2057   PetscFunctionBegin;
2058   /* Set the number of nonzeros in the new matrix */
2059   for (i = 0; i < m; i++) {
2060     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2061     nzx    = xi[i + 1] - xi[i];
2062     nzy    = yi[i + 1] - yi[i];
2063     nnz[i] = 0;
2064     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2065       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2066       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2067       nnz[i]++;
2068     }
2069     for (; k < nzy; k++) nnz[i]++;
2070   }
2071   PetscFunctionReturn(PETSC_SUCCESS);
2072 }
2073 
2074 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2075 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2076 {
2077   PetscInt    m = Y->rmap->N;
2078   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2079   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2080 
2081   PetscFunctionBegin;
2082   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2083   PetscFunctionReturn(PETSC_SUCCESS);
2084 }
2085 
2086 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2087 {
2088   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2089 
2090   PetscFunctionBegin;
2091   if (str == SAME_NONZERO_PATTERN) {
2092     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2093     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2094   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2095     PetscCall(MatAXPY_Basic(Y, a, X, str));
2096   } else {
2097     Mat       B;
2098     PetscInt *nnz_d, *nnz_o;
2099 
2100     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2101     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2102     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2103     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2104     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2105     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2106     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2107     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2108     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2109     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2110     PetscCall(MatHeaderMerge(Y, &B));
2111     PetscCall(PetscFree(nnz_d));
2112     PetscCall(PetscFree(nnz_o));
2113   }
2114   PetscFunctionReturn(PETSC_SUCCESS);
2115 }
2116 
2117 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2118 
2119 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2120 {
2121   PetscFunctionBegin;
2122   if (PetscDefined(USE_COMPLEX)) {
2123     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2124 
2125     PetscCall(MatConjugate_SeqAIJ(aij->A));
2126     PetscCall(MatConjugate_SeqAIJ(aij->B));
2127   }
2128   PetscFunctionReturn(PETSC_SUCCESS);
2129 }
2130 
2131 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2132 {
2133   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2134 
2135   PetscFunctionBegin;
2136   PetscCall(MatRealPart(a->A));
2137   PetscCall(MatRealPart(a->B));
2138   PetscFunctionReturn(PETSC_SUCCESS);
2139 }
2140 
2141 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2142 {
2143   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2144 
2145   PetscFunctionBegin;
2146   PetscCall(MatImaginaryPart(a->A));
2147   PetscCall(MatImaginaryPart(a->B));
2148   PetscFunctionReturn(PETSC_SUCCESS);
2149 }
2150 
2151 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2152 {
2153   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2154   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2155   PetscScalar       *va, *vv;
2156   Vec                vB, vA;
2157   const PetscScalar *vb;
2158 
2159   PetscFunctionBegin;
2160   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2161   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2162 
2163   PetscCall(VecGetArrayWrite(vA, &va));
2164   if (idx) {
2165     for (i = 0; i < m; i++) {
2166       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2167     }
2168   }
2169 
2170   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2171   PetscCall(PetscMalloc1(m, &idxb));
2172   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2173 
2174   PetscCall(VecGetArrayWrite(v, &vv));
2175   PetscCall(VecGetArrayRead(vB, &vb));
2176   for (i = 0; i < m; i++) {
2177     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2178       vv[i] = vb[i];
2179       if (idx) idx[i] = a->garray[idxb[i]];
2180     } else {
2181       vv[i] = va[i];
2182       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2183     }
2184   }
2185   PetscCall(VecRestoreArrayWrite(vA, &vv));
2186   PetscCall(VecRestoreArrayWrite(vA, &va));
2187   PetscCall(VecRestoreArrayRead(vB, &vb));
2188   PetscCall(PetscFree(idxb));
2189   PetscCall(VecDestroy(&vA));
2190   PetscCall(VecDestroy(&vB));
2191   PetscFunctionReturn(PETSC_SUCCESS);
2192 }
2193 
2194 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2195 {
2196   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2197   Vec         vB, vA;
2198 
2199   PetscFunctionBegin;
2200   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2201   PetscCall(MatGetRowSumAbs(a->A, vA));
2202   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2203   PetscCall(MatGetRowSumAbs(a->B, vB));
2204   PetscCall(VecAXPY(vA, 1.0, vB));
2205   PetscCall(VecDestroy(&vB));
2206   PetscCall(VecCopy(vA, v));
2207   PetscCall(VecDestroy(&vA));
2208   PetscFunctionReturn(PETSC_SUCCESS);
2209 }
2210 
2211 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2212 {
2213   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2214   PetscInt           m = A->rmap->n, n = A->cmap->n;
2215   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2216   PetscInt          *cmap = mat->garray;
2217   PetscInt          *diagIdx, *offdiagIdx;
2218   Vec                diagV, offdiagV;
2219   PetscScalar       *a, *diagA, *offdiagA;
2220   const PetscScalar *ba, *bav;
2221   PetscInt           r, j, col, ncols, *bi, *bj;
2222   Mat                B = mat->B;
2223   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2224 
2225   PetscFunctionBegin;
2226   /* When a process holds entire A and other processes have no entry */
2227   if (A->cmap->N == n) {
2228     PetscCall(VecGetArrayWrite(v, &diagA));
2229     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2230     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2231     PetscCall(VecDestroy(&diagV));
2232     PetscCall(VecRestoreArrayWrite(v, &diagA));
2233     PetscFunctionReturn(PETSC_SUCCESS);
2234   } else if (n == 0) {
2235     if (m) {
2236       PetscCall(VecGetArrayWrite(v, &a));
2237       for (r = 0; r < m; r++) {
2238         a[r] = 0.0;
2239         if (idx) idx[r] = -1;
2240       }
2241       PetscCall(VecRestoreArrayWrite(v, &a));
2242     }
2243     PetscFunctionReturn(PETSC_SUCCESS);
2244   }
2245 
2246   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2247   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2248   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2249   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2250 
2251   /* Get offdiagIdx[] for implicit 0.0 */
2252   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2253   ba = bav;
2254   bi = b->i;
2255   bj = b->j;
2256   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2257   for (r = 0; r < m; r++) {
2258     ncols = bi[r + 1] - bi[r];
2259     if (ncols == A->cmap->N - n) { /* Brow is dense */
2260       offdiagA[r]   = *ba;
2261       offdiagIdx[r] = cmap[0];
2262     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2263       offdiagA[r] = 0.0;
2264 
2265       /* Find first hole in the cmap */
2266       for (j = 0; j < ncols; j++) {
2267         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2268         if (col > j && j < cstart) {
2269           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2270           break;
2271         } else if (col > j + n && j >= cstart) {
2272           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2273           break;
2274         }
2275       }
2276       if (j == ncols && ncols < A->cmap->N - n) {
2277         /* a hole is outside compressed Bcols */
2278         if (ncols == 0) {
2279           if (cstart) {
2280             offdiagIdx[r] = 0;
2281           } else offdiagIdx[r] = cend;
2282         } else { /* ncols > 0 */
2283           offdiagIdx[r] = cmap[ncols - 1] + 1;
2284           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2285         }
2286       }
2287     }
2288 
2289     for (j = 0; j < ncols; j++) {
2290       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2291         offdiagA[r]   = *ba;
2292         offdiagIdx[r] = cmap[*bj];
2293       }
2294       ba++;
2295       bj++;
2296     }
2297   }
2298 
2299   PetscCall(VecGetArrayWrite(v, &a));
2300   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2301   for (r = 0; r < m; ++r) {
2302     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2303       a[r] = diagA[r];
2304       if (idx) idx[r] = cstart + diagIdx[r];
2305     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2306       a[r] = diagA[r];
2307       if (idx) {
2308         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2309           idx[r] = cstart + diagIdx[r];
2310         } else idx[r] = offdiagIdx[r];
2311       }
2312     } else {
2313       a[r] = offdiagA[r];
2314       if (idx) idx[r] = offdiagIdx[r];
2315     }
2316   }
2317   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2318   PetscCall(VecRestoreArrayWrite(v, &a));
2319   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2320   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2321   PetscCall(VecDestroy(&diagV));
2322   PetscCall(VecDestroy(&offdiagV));
2323   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2324   PetscFunctionReturn(PETSC_SUCCESS);
2325 }
2326 
2327 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2328 {
2329   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2330   PetscInt           m = A->rmap->n, n = A->cmap->n;
2331   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2332   PetscInt          *cmap = mat->garray;
2333   PetscInt          *diagIdx, *offdiagIdx;
2334   Vec                diagV, offdiagV;
2335   PetscScalar       *a, *diagA, *offdiagA;
2336   const PetscScalar *ba, *bav;
2337   PetscInt           r, j, col, ncols, *bi, *bj;
2338   Mat                B = mat->B;
2339   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2340 
2341   PetscFunctionBegin;
2342   /* When a process holds entire A and other processes have no entry */
2343   if (A->cmap->N == n) {
2344     PetscCall(VecGetArrayWrite(v, &diagA));
2345     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2346     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2347     PetscCall(VecDestroy(&diagV));
2348     PetscCall(VecRestoreArrayWrite(v, &diagA));
2349     PetscFunctionReturn(PETSC_SUCCESS);
2350   } else if (n == 0) {
2351     if (m) {
2352       PetscCall(VecGetArrayWrite(v, &a));
2353       for (r = 0; r < m; r++) {
2354         a[r] = PETSC_MAX_REAL;
2355         if (idx) idx[r] = -1;
2356       }
2357       PetscCall(VecRestoreArrayWrite(v, &a));
2358     }
2359     PetscFunctionReturn(PETSC_SUCCESS);
2360   }
2361 
2362   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2363   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2364   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2365   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2366 
2367   /* Get offdiagIdx[] for implicit 0.0 */
2368   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2369   ba = bav;
2370   bi = b->i;
2371   bj = b->j;
2372   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2373   for (r = 0; r < m; r++) {
2374     ncols = bi[r + 1] - bi[r];
2375     if (ncols == A->cmap->N - n) { /* Brow is dense */
2376       offdiagA[r]   = *ba;
2377       offdiagIdx[r] = cmap[0];
2378     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2379       offdiagA[r] = 0.0;
2380 
2381       /* Find first hole in the cmap */
2382       for (j = 0; j < ncols; j++) {
2383         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2384         if (col > j && j < cstart) {
2385           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2386           break;
2387         } else if (col > j + n && j >= cstart) {
2388           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2389           break;
2390         }
2391       }
2392       if (j == ncols && ncols < A->cmap->N - n) {
2393         /* a hole is outside compressed Bcols */
2394         if (ncols == 0) {
2395           if (cstart) {
2396             offdiagIdx[r] = 0;
2397           } else offdiagIdx[r] = cend;
2398         } else { /* ncols > 0 */
2399           offdiagIdx[r] = cmap[ncols - 1] + 1;
2400           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2401         }
2402       }
2403     }
2404 
2405     for (j = 0; j < ncols; j++) {
2406       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2407         offdiagA[r]   = *ba;
2408         offdiagIdx[r] = cmap[*bj];
2409       }
2410       ba++;
2411       bj++;
2412     }
2413   }
2414 
2415   PetscCall(VecGetArrayWrite(v, &a));
2416   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2417   for (r = 0; r < m; ++r) {
2418     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2419       a[r] = diagA[r];
2420       if (idx) idx[r] = cstart + diagIdx[r];
2421     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2422       a[r] = diagA[r];
2423       if (idx) {
2424         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2425           idx[r] = cstart + diagIdx[r];
2426         } else idx[r] = offdiagIdx[r];
2427       }
2428     } else {
2429       a[r] = offdiagA[r];
2430       if (idx) idx[r] = offdiagIdx[r];
2431     }
2432   }
2433   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2434   PetscCall(VecRestoreArrayWrite(v, &a));
2435   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2436   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2437   PetscCall(VecDestroy(&diagV));
2438   PetscCall(VecDestroy(&offdiagV));
2439   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2440   PetscFunctionReturn(PETSC_SUCCESS);
2441 }
2442 
2443 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2444 {
2445   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2446   PetscInt           m = A->rmap->n, n = A->cmap->n;
2447   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2448   PetscInt          *cmap = mat->garray;
2449   PetscInt          *diagIdx, *offdiagIdx;
2450   Vec                diagV, offdiagV;
2451   PetscScalar       *a, *diagA, *offdiagA;
2452   const PetscScalar *ba, *bav;
2453   PetscInt           r, j, col, ncols, *bi, *bj;
2454   Mat                B = mat->B;
2455   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2456 
2457   PetscFunctionBegin;
2458   /* When a process holds entire A and other processes have no entry */
2459   if (A->cmap->N == n) {
2460     PetscCall(VecGetArrayWrite(v, &diagA));
2461     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2462     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2463     PetscCall(VecDestroy(&diagV));
2464     PetscCall(VecRestoreArrayWrite(v, &diagA));
2465     PetscFunctionReturn(PETSC_SUCCESS);
2466   } else if (n == 0) {
2467     if (m) {
2468       PetscCall(VecGetArrayWrite(v, &a));
2469       for (r = 0; r < m; r++) {
2470         a[r] = PETSC_MIN_REAL;
2471         if (idx) idx[r] = -1;
2472       }
2473       PetscCall(VecRestoreArrayWrite(v, &a));
2474     }
2475     PetscFunctionReturn(PETSC_SUCCESS);
2476   }
2477 
2478   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2479   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2480   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2481   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2482 
2483   /* Get offdiagIdx[] for implicit 0.0 */
2484   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2485   ba = bav;
2486   bi = b->i;
2487   bj = b->j;
2488   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2489   for (r = 0; r < m; r++) {
2490     ncols = bi[r + 1] - bi[r];
2491     if (ncols == A->cmap->N - n) { /* Brow is dense */
2492       offdiagA[r]   = *ba;
2493       offdiagIdx[r] = cmap[0];
2494     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2495       offdiagA[r] = 0.0;
2496 
2497       /* Find first hole in the cmap */
2498       for (j = 0; j < ncols; j++) {
2499         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2500         if (col > j && j < cstart) {
2501           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2502           break;
2503         } else if (col > j + n && j >= cstart) {
2504           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2505           break;
2506         }
2507       }
2508       if (j == ncols && ncols < A->cmap->N - n) {
2509         /* a hole is outside compressed Bcols */
2510         if (ncols == 0) {
2511           if (cstart) {
2512             offdiagIdx[r] = 0;
2513           } else offdiagIdx[r] = cend;
2514         } else { /* ncols > 0 */
2515           offdiagIdx[r] = cmap[ncols - 1] + 1;
2516           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2517         }
2518       }
2519     }
2520 
2521     for (j = 0; j < ncols; j++) {
2522       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2523         offdiagA[r]   = *ba;
2524         offdiagIdx[r] = cmap[*bj];
2525       }
2526       ba++;
2527       bj++;
2528     }
2529   }
2530 
2531   PetscCall(VecGetArrayWrite(v, &a));
2532   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2533   for (r = 0; r < m; ++r) {
2534     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2535       a[r] = diagA[r];
2536       if (idx) idx[r] = cstart + diagIdx[r];
2537     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2538       a[r] = diagA[r];
2539       if (idx) {
2540         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2541           idx[r] = cstart + diagIdx[r];
2542         } else idx[r] = offdiagIdx[r];
2543       }
2544     } else {
2545       a[r] = offdiagA[r];
2546       if (idx) idx[r] = offdiagIdx[r];
2547     }
2548   }
2549   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2550   PetscCall(VecRestoreArrayWrite(v, &a));
2551   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2552   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2553   PetscCall(VecDestroy(&diagV));
2554   PetscCall(VecDestroy(&offdiagV));
2555   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2556   PetscFunctionReturn(PETSC_SUCCESS);
2557 }
2558 
2559 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2560 {
2561   Mat *dummy;
2562 
2563   PetscFunctionBegin;
2564   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2565   *newmat = *dummy;
2566   PetscCall(PetscFree(dummy));
2567   PetscFunctionReturn(PETSC_SUCCESS);
2568 }
2569 
2570 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2571 {
2572   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2573 
2574   PetscFunctionBegin;
2575   PetscCall(MatInvertBlockDiagonal(a->A, values));
2576   A->factorerrortype = a->A->factorerrortype;
2577   PetscFunctionReturn(PETSC_SUCCESS);
2578 }
2579 
2580 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2581 {
2582   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2583 
2584   PetscFunctionBegin;
2585   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2586   PetscCall(MatSetRandom(aij->A, rctx));
2587   if (x->assembled) {
2588     PetscCall(MatSetRandom(aij->B, rctx));
2589   } else {
2590     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2591   }
2592   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2593   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2594   PetscFunctionReturn(PETSC_SUCCESS);
2595 }
2596 
2597 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2598 {
2599   PetscFunctionBegin;
2600   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2601   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2602   PetscFunctionReturn(PETSC_SUCCESS);
2603 }
2604 
2605 /*@
2606   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2607 
2608   Not Collective
2609 
2610   Input Parameter:
2611 . A - the matrix
2612 
2613   Output Parameter:
2614 . nz - the number of nonzeros
2615 
2616   Level: advanced
2617 
2618 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2619 @*/
2620 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2621 {
2622   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2623   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2624   PetscBool   isaij;
2625 
2626   PetscFunctionBegin;
2627   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2628   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2629   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2630   PetscFunctionReturn(PETSC_SUCCESS);
2631 }
2632 
2633 /*@
2634   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2635 
2636   Collective
2637 
2638   Input Parameters:
2639 + A  - the matrix
2640 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2641 
2642   Level: advanced
2643 
2644 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2645 @*/
2646 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2647 {
2648   PetscFunctionBegin;
2649   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2650   PetscFunctionReturn(PETSC_SUCCESS);
2651 }
2652 
2653 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2654 {
2655   PetscBool sc = PETSC_FALSE, flg;
2656 
2657   PetscFunctionBegin;
2658   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2659   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2660   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2661   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2662   PetscOptionsHeadEnd();
2663   PetscFunctionReturn(PETSC_SUCCESS);
2664 }
2665 
2666 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2667 {
2668   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2669   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2670 
2671   PetscFunctionBegin;
2672   if (!Y->preallocated) {
2673     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2674   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2675     PetscInt nonew = aij->nonew;
2676     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2677     aij->nonew = nonew;
2678   }
2679   PetscCall(MatShift_Basic(Y, a));
2680   PetscFunctionReturn(PETSC_SUCCESS);
2681 }
2682 
2683 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2684 {
2685   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2686 
2687   PetscFunctionBegin;
2688   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2689   PetscCall(MatMissingDiagonal(a->A, missing, d));
2690   if (d) {
2691     PetscInt rstart;
2692     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2693     *d += rstart;
2694   }
2695   PetscFunctionReturn(PETSC_SUCCESS);
2696 }
2697 
2698 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2699 {
2700   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2701 
2702   PetscFunctionBegin;
2703   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2704   PetscFunctionReturn(PETSC_SUCCESS);
2705 }
2706 
2707 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2708 {
2709   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2710 
2711   PetscFunctionBegin;
2712   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2713   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2714   PetscFunctionReturn(PETSC_SUCCESS);
2715 }
2716 
2717 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2718                                        MatGetRow_MPIAIJ,
2719                                        MatRestoreRow_MPIAIJ,
2720                                        MatMult_MPIAIJ,
2721                                        /* 4*/ MatMultAdd_MPIAIJ,
2722                                        MatMultTranspose_MPIAIJ,
2723                                        MatMultTransposeAdd_MPIAIJ,
2724                                        NULL,
2725                                        NULL,
2726                                        NULL,
2727                                        /*10*/ NULL,
2728                                        NULL,
2729                                        NULL,
2730                                        MatSOR_MPIAIJ,
2731                                        MatTranspose_MPIAIJ,
2732                                        /*15*/ MatGetInfo_MPIAIJ,
2733                                        MatEqual_MPIAIJ,
2734                                        MatGetDiagonal_MPIAIJ,
2735                                        MatDiagonalScale_MPIAIJ,
2736                                        MatNorm_MPIAIJ,
2737                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2738                                        MatAssemblyEnd_MPIAIJ,
2739                                        MatSetOption_MPIAIJ,
2740                                        MatZeroEntries_MPIAIJ,
2741                                        /*24*/ MatZeroRows_MPIAIJ,
2742                                        NULL,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        /*29*/ MatSetUp_MPI_Hash,
2747                                        NULL,
2748                                        NULL,
2749                                        MatGetDiagonalBlock_MPIAIJ,
2750                                        NULL,
2751                                        /*34*/ MatDuplicate_MPIAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                        /*39*/ MatAXPY_MPIAIJ,
2757                                        MatCreateSubMatrices_MPIAIJ,
2758                                        MatIncreaseOverlap_MPIAIJ,
2759                                        MatGetValues_MPIAIJ,
2760                                        MatCopy_MPIAIJ,
2761                                        /*44*/ MatGetRowMax_MPIAIJ,
2762                                        MatScale_MPIAIJ,
2763                                        MatShift_MPIAIJ,
2764                                        MatDiagonalSet_MPIAIJ,
2765                                        MatZeroRowsColumns_MPIAIJ,
2766                                        /*49*/ MatSetRandom_MPIAIJ,
2767                                        MatGetRowIJ_MPIAIJ,
2768                                        MatRestoreRowIJ_MPIAIJ,
2769                                        NULL,
2770                                        NULL,
2771                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2772                                        NULL,
2773                                        MatSetUnfactored_MPIAIJ,
2774                                        MatPermute_MPIAIJ,
2775                                        NULL,
2776                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2777                                        MatDestroy_MPIAIJ,
2778                                        MatView_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        /*64*/ NULL,
2782                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2783                                        NULL,
2784                                        NULL,
2785                                        NULL,
2786                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2787                                        MatGetRowMinAbs_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                        NULL,
2791                                        NULL,
2792                                        /*75*/ MatFDColoringApply_AIJ,
2793                                        MatSetFromOptions_MPIAIJ,
2794                                        NULL,
2795                                        NULL,
2796                                        MatFindZeroDiagonals_MPIAIJ,
2797                                        /*80*/ NULL,
2798                                        NULL,
2799                                        NULL,
2800                                        /*83*/ MatLoad_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        /*89*/ NULL,
2807                                        NULL,
2808                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2809                                        NULL,
2810                                        NULL,
2811                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2812                                        NULL,
2813                                        NULL,
2814                                        NULL,
2815                                        MatBindToCPU_MPIAIJ,
2816                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2817                                        NULL,
2818                                        NULL,
2819                                        MatConjugate_MPIAIJ,
2820                                        NULL,
2821                                        /*104*/ MatSetValuesRow_MPIAIJ,
2822                                        MatRealPart_MPIAIJ,
2823                                        MatImaginaryPart_MPIAIJ,
2824                                        NULL,
2825                                        NULL,
2826                                        /*109*/ NULL,
2827                                        NULL,
2828                                        MatGetRowMin_MPIAIJ,
2829                                        NULL,
2830                                        MatMissingDiagonal_MPIAIJ,
2831                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2832                                        NULL,
2833                                        MatGetGhosts_MPIAIJ,
2834                                        NULL,
2835                                        NULL,
2836                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2837                                        NULL,
2838                                        NULL,
2839                                        NULL,
2840                                        MatGetMultiProcBlock_MPIAIJ,
2841                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2842                                        MatGetColumnReductions_MPIAIJ,
2843                                        MatInvertBlockDiagonal_MPIAIJ,
2844                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2845                                        MatCreateSubMatricesMPI_MPIAIJ,
2846                                        /*129*/ NULL,
2847                                        NULL,
2848                                        NULL,
2849                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2850                                        NULL,
2851                                        /*134*/ NULL,
2852                                        NULL,
2853                                        NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2857                                        NULL,
2858                                        NULL,
2859                                        MatFDColoringSetUp_MPIXAIJ,
2860                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2861                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2862                                        /*145*/ NULL,
2863                                        NULL,
2864                                        NULL,
2865                                        MatCreateGraph_Simple_AIJ,
2866                                        NULL,
2867                                        /*150*/ NULL,
2868                                        MatEliminateZeros_MPIAIJ,
2869                                        MatGetRowSumAbs_MPIAIJ,
2870                                        NULL};
2871 
2872 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2873 {
2874   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2875 
2876   PetscFunctionBegin;
2877   PetscCall(MatStoreValues(aij->A));
2878   PetscCall(MatStoreValues(aij->B));
2879   PetscFunctionReturn(PETSC_SUCCESS);
2880 }
2881 
2882 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2883 {
2884   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2885 
2886   PetscFunctionBegin;
2887   PetscCall(MatRetrieveValues(aij->A));
2888   PetscCall(MatRetrieveValues(aij->B));
2889   PetscFunctionReturn(PETSC_SUCCESS);
2890 }
2891 
2892 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2893 {
2894   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2895   PetscMPIInt size;
2896 
2897   PetscFunctionBegin;
2898   if (B->hash_active) {
2899     B->ops[0]      = b->cops;
2900     B->hash_active = PETSC_FALSE;
2901   }
2902   PetscCall(PetscLayoutSetUp(B->rmap));
2903   PetscCall(PetscLayoutSetUp(B->cmap));
2904 
2905 #if defined(PETSC_USE_CTABLE)
2906   PetscCall(PetscHMapIDestroy(&b->colmap));
2907 #else
2908   PetscCall(PetscFree(b->colmap));
2909 #endif
2910   PetscCall(PetscFree(b->garray));
2911   PetscCall(VecDestroy(&b->lvec));
2912   PetscCall(VecScatterDestroy(&b->Mvctx));
2913 
2914   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2915 
2916   MatSeqXAIJGetOptions_Private(b->B);
2917   PetscCall(MatDestroy(&b->B));
2918   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2919   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2920   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2921   PetscCall(MatSetType(b->B, MATSEQAIJ));
2922   MatSeqXAIJRestoreOptions_Private(b->B);
2923 
2924   MatSeqXAIJGetOptions_Private(b->A);
2925   PetscCall(MatDestroy(&b->A));
2926   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2927   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2928   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2929   PetscCall(MatSetType(b->A, MATSEQAIJ));
2930   MatSeqXAIJRestoreOptions_Private(b->A);
2931 
2932   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2933   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2934   B->preallocated  = PETSC_TRUE;
2935   B->was_assembled = PETSC_FALSE;
2936   B->assembled     = PETSC_FALSE;
2937   PetscFunctionReturn(PETSC_SUCCESS);
2938 }
2939 
2940 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2941 {
2942   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2943 
2944   PetscFunctionBegin;
2945   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2946   PetscCall(PetscLayoutSetUp(B->rmap));
2947   PetscCall(PetscLayoutSetUp(B->cmap));
2948 
2949 #if defined(PETSC_USE_CTABLE)
2950   PetscCall(PetscHMapIDestroy(&b->colmap));
2951 #else
2952   PetscCall(PetscFree(b->colmap));
2953 #endif
2954   PetscCall(PetscFree(b->garray));
2955   PetscCall(VecDestroy(&b->lvec));
2956   PetscCall(VecScatterDestroy(&b->Mvctx));
2957 
2958   PetscCall(MatResetPreallocation(b->A));
2959   PetscCall(MatResetPreallocation(b->B));
2960   B->preallocated  = PETSC_TRUE;
2961   B->was_assembled = PETSC_FALSE;
2962   B->assembled     = PETSC_FALSE;
2963   PetscFunctionReturn(PETSC_SUCCESS);
2964 }
2965 
2966 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2967 {
2968   Mat         mat;
2969   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2970 
2971   PetscFunctionBegin;
2972   *newmat = NULL;
2973   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2974   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2975   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2976   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2977   a = (Mat_MPIAIJ *)mat->data;
2978 
2979   mat->factortype = matin->factortype;
2980   mat->assembled  = matin->assembled;
2981   mat->insertmode = NOT_SET_VALUES;
2982 
2983   a->size         = oldmat->size;
2984   a->rank         = oldmat->rank;
2985   a->donotstash   = oldmat->donotstash;
2986   a->roworiented  = oldmat->roworiented;
2987   a->rowindices   = NULL;
2988   a->rowvalues    = NULL;
2989   a->getrowactive = PETSC_FALSE;
2990 
2991   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2992   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2993   if (matin->hash_active) {
2994     PetscCall(MatSetUp(mat));
2995   } else {
2996     mat->preallocated = matin->preallocated;
2997     if (oldmat->colmap) {
2998 #if defined(PETSC_USE_CTABLE)
2999       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3000 #else
3001       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3002       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3003 #endif
3004     } else a->colmap = NULL;
3005     if (oldmat->garray) {
3006       PetscInt len;
3007       len = oldmat->B->cmap->n;
3008       PetscCall(PetscMalloc1(len + 1, &a->garray));
3009       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3010     } else a->garray = NULL;
3011 
3012     /* It may happen MatDuplicate is called with a non-assembled matrix
3013       In fact, MatDuplicate only requires the matrix to be preallocated
3014       This may happen inside a DMCreateMatrix_Shell */
3015     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3016     if (oldmat->Mvctx) {
3017       a->Mvctx = oldmat->Mvctx;
3018       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3019     }
3020     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3021     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3022   }
3023   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3024   *newmat = mat;
3025   PetscFunctionReturn(PETSC_SUCCESS);
3026 }
3027 
3028 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3029 {
3030   PetscBool isbinary, ishdf5;
3031 
3032   PetscFunctionBegin;
3033   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3034   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3035   /* force binary viewer to load .info file if it has not yet done so */
3036   PetscCall(PetscViewerSetUp(viewer));
3037   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3038   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3039   if (isbinary) {
3040     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3041   } else if (ishdf5) {
3042 #if defined(PETSC_HAVE_HDF5)
3043     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3044 #else
3045     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3046 #endif
3047   } else {
3048     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3049   }
3050   PetscFunctionReturn(PETSC_SUCCESS);
3051 }
3052 
3053 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3054 {
3055   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3056   PetscInt    *rowidxs, *colidxs;
3057   PetscScalar *matvals;
3058 
3059   PetscFunctionBegin;
3060   PetscCall(PetscViewerSetUp(viewer));
3061 
3062   /* read in matrix header */
3063   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3064   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3065   M  = header[1];
3066   N  = header[2];
3067   nz = header[3];
3068   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3069   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3070   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3071 
3072   /* set block sizes from the viewer's .info file */
3073   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3074   /* set global sizes if not set already */
3075   if (mat->rmap->N < 0) mat->rmap->N = M;
3076   if (mat->cmap->N < 0) mat->cmap->N = N;
3077   PetscCall(PetscLayoutSetUp(mat->rmap));
3078   PetscCall(PetscLayoutSetUp(mat->cmap));
3079 
3080   /* check if the matrix sizes are correct */
3081   PetscCall(MatGetSize(mat, &rows, &cols));
3082   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3083 
3084   /* read in row lengths and build row indices */
3085   PetscCall(MatGetLocalSize(mat, &m, NULL));
3086   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3087   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3088   rowidxs[0] = 0;
3089   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3090   if (nz != PETSC_INT_MAX) {
3091     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3092     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3093   }
3094 
3095   /* read in column indices and matrix values */
3096   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3097   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3098   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3099   /* store matrix indices and values */
3100   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3101   PetscCall(PetscFree(rowidxs));
3102   PetscCall(PetscFree2(colidxs, matvals));
3103   PetscFunctionReturn(PETSC_SUCCESS);
3104 }
3105 
3106 /* Not scalable because of ISAllGather() unless getting all columns. */
3107 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3108 {
3109   IS          iscol_local;
3110   PetscBool   isstride;
3111   PetscMPIInt lisstride = 0, gisstride;
3112 
3113   PetscFunctionBegin;
3114   /* check if we are grabbing all columns*/
3115   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3116 
3117   if (isstride) {
3118     PetscInt start, len, mstart, mlen;
3119     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3120     PetscCall(ISGetLocalSize(iscol, &len));
3121     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3122     if (mstart == start && mlen - mstart == len) lisstride = 1;
3123   }
3124 
3125   PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3126   if (gisstride) {
3127     PetscInt N;
3128     PetscCall(MatGetSize(mat, NULL, &N));
3129     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3130     PetscCall(ISSetIdentity(iscol_local));
3131     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3132   } else {
3133     PetscInt cbs;
3134     PetscCall(ISGetBlockSize(iscol, &cbs));
3135     PetscCall(ISAllGather(iscol, &iscol_local));
3136     PetscCall(ISSetBlockSize(iscol_local, cbs));
3137   }
3138 
3139   *isseq = iscol_local;
3140   PetscFunctionReturn(PETSC_SUCCESS);
3141 }
3142 
3143 /*
3144  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3145  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3146 
3147  Input Parameters:
3148 +   mat - matrix
3149 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3150            i.e., mat->rstart <= isrow[i] < mat->rend
3151 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3152            i.e., mat->cstart <= iscol[i] < mat->cend
3153 
3154  Output Parameters:
3155 +   isrow_d - sequential row index set for retrieving mat->A
3156 .   iscol_d - sequential  column index set for retrieving mat->A
3157 .   iscol_o - sequential column index set for retrieving mat->B
3158 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3159  */
3160 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3161 {
3162   Vec             x, cmap;
3163   const PetscInt *is_idx;
3164   PetscScalar    *xarray, *cmaparray;
3165   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3166   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3167   Mat             B    = a->B;
3168   Vec             lvec = a->lvec, lcmap;
3169   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3170   MPI_Comm        comm;
3171   VecScatter      Mvctx = a->Mvctx;
3172 
3173   PetscFunctionBegin;
3174   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3175   PetscCall(ISGetLocalSize(iscol, &ncols));
3176 
3177   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3178   PetscCall(MatCreateVecs(mat, &x, NULL));
3179   PetscCall(VecSet(x, -1.0));
3180   PetscCall(VecDuplicate(x, &cmap));
3181   PetscCall(VecSet(cmap, -1.0));
3182 
3183   /* Get start indices */
3184   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3185   isstart -= ncols;
3186   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3187 
3188   PetscCall(ISGetIndices(iscol, &is_idx));
3189   PetscCall(VecGetArray(x, &xarray));
3190   PetscCall(VecGetArray(cmap, &cmaparray));
3191   PetscCall(PetscMalloc1(ncols, &idx));
3192   for (i = 0; i < ncols; i++) {
3193     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3194     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3195     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3196   }
3197   PetscCall(VecRestoreArray(x, &xarray));
3198   PetscCall(VecRestoreArray(cmap, &cmaparray));
3199   PetscCall(ISRestoreIndices(iscol, &is_idx));
3200 
3201   /* Get iscol_d */
3202   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3203   PetscCall(ISGetBlockSize(iscol, &i));
3204   PetscCall(ISSetBlockSize(*iscol_d, i));
3205 
3206   /* Get isrow_d */
3207   PetscCall(ISGetLocalSize(isrow, &m));
3208   rstart = mat->rmap->rstart;
3209   PetscCall(PetscMalloc1(m, &idx));
3210   PetscCall(ISGetIndices(isrow, &is_idx));
3211   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3212   PetscCall(ISRestoreIndices(isrow, &is_idx));
3213 
3214   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3215   PetscCall(ISGetBlockSize(isrow, &i));
3216   PetscCall(ISSetBlockSize(*isrow_d, i));
3217 
3218   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3219   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3220   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3221 
3222   PetscCall(VecDuplicate(lvec, &lcmap));
3223 
3224   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3225   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3226 
3227   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3228   /* off-process column indices */
3229   count = 0;
3230   PetscCall(PetscMalloc1(Bn, &idx));
3231   PetscCall(PetscMalloc1(Bn, &cmap1));
3232 
3233   PetscCall(VecGetArray(lvec, &xarray));
3234   PetscCall(VecGetArray(lcmap, &cmaparray));
3235   for (i = 0; i < Bn; i++) {
3236     if (PetscRealPart(xarray[i]) > -1.0) {
3237       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3238       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3239       count++;
3240     }
3241   }
3242   PetscCall(VecRestoreArray(lvec, &xarray));
3243   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3244 
3245   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3246   /* cannot ensure iscol_o has same blocksize as iscol! */
3247 
3248   PetscCall(PetscFree(idx));
3249   *garray = cmap1;
3250 
3251   PetscCall(VecDestroy(&x));
3252   PetscCall(VecDestroy(&cmap));
3253   PetscCall(VecDestroy(&lcmap));
3254   PetscFunctionReturn(PETSC_SUCCESS);
3255 }
3256 
3257 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3259 {
3260   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3261   Mat         M = NULL;
3262   MPI_Comm    comm;
3263   IS          iscol_d, isrow_d, iscol_o;
3264   Mat         Asub = NULL, Bsub = NULL;
3265   PetscInt    n;
3266 
3267   PetscFunctionBegin;
3268   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3269 
3270   if (call == MAT_REUSE_MATRIX) {
3271     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3272     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3273     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3274 
3275     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3276     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3277 
3278     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3279     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3280 
3281     /* Update diagonal and off-diagonal portions of submat */
3282     asub = (Mat_MPIAIJ *)(*submat)->data;
3283     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3284     PetscCall(ISGetLocalSize(iscol_o, &n));
3285     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3286     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3287     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3288 
3289   } else { /* call == MAT_INITIAL_MATRIX) */
3290     const PetscInt *garray;
3291     PetscInt        BsubN;
3292 
3293     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3294     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3295 
3296     /* Create local submatrices Asub and Bsub */
3297     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3298     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3299 
3300     /* Create submatrix M */
3301     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3302 
3303     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3304     asub = (Mat_MPIAIJ *)M->data;
3305 
3306     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3307     n = asub->B->cmap->N;
3308     if (BsubN > n) {
3309       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3310       const PetscInt *idx;
3311       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3312       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3313 
3314       PetscCall(PetscMalloc1(n, &idx_new));
3315       j = 0;
3316       PetscCall(ISGetIndices(iscol_o, &idx));
3317       for (i = 0; i < n; i++) {
3318         if (j >= BsubN) break;
3319         while (subgarray[i] > garray[j]) j++;
3320 
3321         if (subgarray[i] == garray[j]) {
3322           idx_new[i] = idx[j++];
3323         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3324       }
3325       PetscCall(ISRestoreIndices(iscol_o, &idx));
3326 
3327       PetscCall(ISDestroy(&iscol_o));
3328       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3329 
3330     } else if (BsubN < n) {
3331       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3332     }
3333 
3334     PetscCall(PetscFree(garray));
3335     *submat = M;
3336 
3337     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3338     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3339     PetscCall(ISDestroy(&isrow_d));
3340 
3341     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3342     PetscCall(ISDestroy(&iscol_d));
3343 
3344     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3345     PetscCall(ISDestroy(&iscol_o));
3346   }
3347   PetscFunctionReturn(PETSC_SUCCESS);
3348 }
3349 
3350 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3351 {
3352   IS        iscol_local = NULL, isrow_d;
3353   PetscInt  csize;
3354   PetscInt  n, i, j, start, end;
3355   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3356   MPI_Comm  comm;
3357 
3358   PetscFunctionBegin;
3359   /* If isrow has same processor distribution as mat,
3360      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3361   if (call == MAT_REUSE_MATRIX) {
3362     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3363     if (isrow_d) {
3364       sameRowDist  = PETSC_TRUE;
3365       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3366     } else {
3367       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3368       if (iscol_local) {
3369         sameRowDist  = PETSC_TRUE;
3370         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3371       }
3372     }
3373   } else {
3374     /* Check if isrow has same processor distribution as mat */
3375     sameDist[0] = PETSC_FALSE;
3376     PetscCall(ISGetLocalSize(isrow, &n));
3377     if (!n) {
3378       sameDist[0] = PETSC_TRUE;
3379     } else {
3380       PetscCall(ISGetMinMax(isrow, &i, &j));
3381       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3382       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3383     }
3384 
3385     /* Check if iscol has same processor distribution as mat */
3386     sameDist[1] = PETSC_FALSE;
3387     PetscCall(ISGetLocalSize(iscol, &n));
3388     if (!n) {
3389       sameDist[1] = PETSC_TRUE;
3390     } else {
3391       PetscCall(ISGetMinMax(iscol, &i, &j));
3392       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3393       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3394     }
3395 
3396     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3397     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3398     sameRowDist = tsameDist[0];
3399   }
3400 
3401   if (sameRowDist) {
3402     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3403       /* isrow and iscol have same processor distribution as mat */
3404       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3405       PetscFunctionReturn(PETSC_SUCCESS);
3406     } else { /* sameRowDist */
3407       /* isrow has same processor distribution as mat */
3408       if (call == MAT_INITIAL_MATRIX) {
3409         PetscBool sorted;
3410         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3411         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3412         PetscCall(ISGetSize(iscol, &i));
3413         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3414 
3415         PetscCall(ISSorted(iscol_local, &sorted));
3416         if (sorted) {
3417           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3418           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3419           PetscFunctionReturn(PETSC_SUCCESS);
3420         }
3421       } else { /* call == MAT_REUSE_MATRIX */
3422         IS iscol_sub;
3423         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3424         if (iscol_sub) {
3425           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3426           PetscFunctionReturn(PETSC_SUCCESS);
3427         }
3428       }
3429     }
3430   }
3431 
3432   /* General case: iscol -> iscol_local which has global size of iscol */
3433   if (call == MAT_REUSE_MATRIX) {
3434     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3435     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3436   } else {
3437     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3438   }
3439 
3440   PetscCall(ISGetLocalSize(iscol, &csize));
3441   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3442 
3443   if (call == MAT_INITIAL_MATRIX) {
3444     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3445     PetscCall(ISDestroy(&iscol_local));
3446   }
3447   PetscFunctionReturn(PETSC_SUCCESS);
3448 }
3449 
3450 /*@C
3451   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3452   and "off-diagonal" part of the matrix in CSR format.
3453 
3454   Collective
3455 
3456   Input Parameters:
3457 + comm   - MPI communicator
3458 . A      - "diagonal" portion of matrix
3459 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3460 - garray - global index of `B` columns
3461 
3462   Output Parameter:
3463 . mat - the matrix, with input `A` as its local diagonal matrix
3464 
3465   Level: advanced
3466 
3467   Notes:
3468   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3469 
3470   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3471 
3472 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3473 @*/
3474 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3475 {
3476   Mat_MPIAIJ        *maij;
3477   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3478   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3479   const PetscScalar *oa;
3480   Mat                Bnew;
3481   PetscInt           m, n, N;
3482   MatType            mpi_mat_type;
3483 
3484   PetscFunctionBegin;
3485   PetscCall(MatCreate(comm, mat));
3486   PetscCall(MatGetSize(A, &m, &n));
3487   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3488   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3489   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3490   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3491 
3492   /* Get global columns of mat */
3493   PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3494 
3495   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3496   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3497   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3498   PetscCall(MatSetType(*mat, mpi_mat_type));
3499 
3500   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3501   maij = (Mat_MPIAIJ *)(*mat)->data;
3502 
3503   (*mat)->preallocated = PETSC_TRUE;
3504 
3505   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3506   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3507 
3508   /* Set A as diagonal portion of *mat */
3509   maij->A = A;
3510 
3511   nz = oi[m];
3512   for (i = 0; i < nz; i++) {
3513     col   = oj[i];
3514     oj[i] = garray[col];
3515   }
3516 
3517   /* Set Bnew as off-diagonal portion of *mat */
3518   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3519   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3520   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3521   bnew        = (Mat_SeqAIJ *)Bnew->data;
3522   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3523   maij->B     = Bnew;
3524 
3525   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3526 
3527   b->free_a  = PETSC_FALSE;
3528   b->free_ij = PETSC_FALSE;
3529   PetscCall(MatDestroy(&B));
3530 
3531   bnew->free_a  = PETSC_TRUE;
3532   bnew->free_ij = PETSC_TRUE;
3533 
3534   /* condense columns of maij->B */
3535   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3536   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3537   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3538   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3539   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3540   PetscFunctionReturn(PETSC_SUCCESS);
3541 }
3542 
3543 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3544 
3545 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3546 {
3547   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3548   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3549   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3550   Mat             M, Msub, B = a->B;
3551   MatScalar      *aa;
3552   Mat_SeqAIJ     *aij;
3553   PetscInt       *garray = a->garray, *colsub, Ncols;
3554   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3555   IS              iscol_sub, iscmap;
3556   const PetscInt *is_idx, *cmap;
3557   PetscBool       allcolumns = PETSC_FALSE;
3558   MPI_Comm        comm;
3559 
3560   PetscFunctionBegin;
3561   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3562   if (call == MAT_REUSE_MATRIX) {
3563     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3564     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3565     PetscCall(ISGetLocalSize(iscol_sub, &count));
3566 
3567     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3568     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3569 
3570     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3571     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3572 
3573     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3574 
3575   } else { /* call == MAT_INITIAL_MATRIX) */
3576     PetscBool flg;
3577 
3578     PetscCall(ISGetLocalSize(iscol, &n));
3579     PetscCall(ISGetSize(iscol, &Ncols));
3580 
3581     /* (1) iscol -> nonscalable iscol_local */
3582     /* Check for special case: each processor gets entire matrix columns */
3583     PetscCall(ISIdentity(iscol_local, &flg));
3584     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3585     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3586     if (allcolumns) {
3587       iscol_sub = iscol_local;
3588       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3589       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3590 
3591     } else {
3592       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3593       PetscInt *idx, *cmap1, k;
3594       PetscCall(PetscMalloc1(Ncols, &idx));
3595       PetscCall(PetscMalloc1(Ncols, &cmap1));
3596       PetscCall(ISGetIndices(iscol_local, &is_idx));
3597       count = 0;
3598       k     = 0;
3599       for (i = 0; i < Ncols; i++) {
3600         j = is_idx[i];
3601         if (j >= cstart && j < cend) {
3602           /* diagonal part of mat */
3603           idx[count]     = j;
3604           cmap1[count++] = i; /* column index in submat */
3605         } else if (Bn) {
3606           /* off-diagonal part of mat */
3607           if (j == garray[k]) {
3608             idx[count]     = j;
3609             cmap1[count++] = i; /* column index in submat */
3610           } else if (j > garray[k]) {
3611             while (j > garray[k] && k < Bn - 1) k++;
3612             if (j == garray[k]) {
3613               idx[count]     = j;
3614               cmap1[count++] = i; /* column index in submat */
3615             }
3616           }
3617         }
3618       }
3619       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3620 
3621       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3622       PetscCall(ISGetBlockSize(iscol, &cbs));
3623       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3624 
3625       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3626     }
3627 
3628     /* (3) Create sequential Msub */
3629     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3630   }
3631 
3632   PetscCall(ISGetLocalSize(iscol_sub, &count));
3633   aij = (Mat_SeqAIJ *)(Msub)->data;
3634   ii  = aij->i;
3635   PetscCall(ISGetIndices(iscmap, &cmap));
3636 
3637   /*
3638       m - number of local rows
3639       Ncols - number of columns (same on all processors)
3640       rstart - first row in new global matrix generated
3641   */
3642   PetscCall(MatGetSize(Msub, &m, NULL));
3643 
3644   if (call == MAT_INITIAL_MATRIX) {
3645     /* (4) Create parallel newmat */
3646     PetscMPIInt rank, size;
3647     PetscInt    csize;
3648 
3649     PetscCallMPI(MPI_Comm_size(comm, &size));
3650     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3651 
3652     /*
3653         Determine the number of non-zeros in the diagonal and off-diagonal
3654         portions of the matrix in order to do correct preallocation
3655     */
3656 
3657     /* first get start and end of "diagonal" columns */
3658     PetscCall(ISGetLocalSize(iscol, &csize));
3659     if (csize == PETSC_DECIDE) {
3660       PetscCall(ISGetSize(isrow, &mglobal));
3661       if (mglobal == Ncols) { /* square matrix */
3662         nlocal = m;
3663       } else {
3664         nlocal = Ncols / size + ((Ncols % size) > rank);
3665       }
3666     } else {
3667       nlocal = csize;
3668     }
3669     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3670     rstart = rend - nlocal;
3671     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3672 
3673     /* next, compute all the lengths */
3674     jj = aij->j;
3675     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3676     olens = dlens + m;
3677     for (i = 0; i < m; i++) {
3678       jend = ii[i + 1] - ii[i];
3679       olen = 0;
3680       dlen = 0;
3681       for (j = 0; j < jend; j++) {
3682         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3683         else dlen++;
3684         jj++;
3685       }
3686       olens[i] = olen;
3687       dlens[i] = dlen;
3688     }
3689 
3690     PetscCall(ISGetBlockSize(isrow, &bs));
3691     PetscCall(ISGetBlockSize(iscol, &cbs));
3692 
3693     PetscCall(MatCreate(comm, &M));
3694     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3695     PetscCall(MatSetBlockSizes(M, bs, cbs));
3696     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3697     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3698     PetscCall(PetscFree(dlens));
3699 
3700   } else { /* call == MAT_REUSE_MATRIX */
3701     M = *newmat;
3702     PetscCall(MatGetLocalSize(M, &i, NULL));
3703     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3704     PetscCall(MatZeroEntries(M));
3705     /*
3706          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3707        rather than the slower MatSetValues().
3708     */
3709     M->was_assembled = PETSC_TRUE;
3710     M->assembled     = PETSC_FALSE;
3711   }
3712 
3713   /* (5) Set values of Msub to *newmat */
3714   PetscCall(PetscMalloc1(count, &colsub));
3715   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3716 
3717   jj = aij->j;
3718   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3719   for (i = 0; i < m; i++) {
3720     row = rstart + i;
3721     nz  = ii[i + 1] - ii[i];
3722     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3723     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3724     jj += nz;
3725     aa += nz;
3726   }
3727   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3728   PetscCall(ISRestoreIndices(iscmap, &cmap));
3729 
3730   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3731   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3732 
3733   PetscCall(PetscFree(colsub));
3734 
3735   /* save Msub, iscol_sub and iscmap used in processor for next request */
3736   if (call == MAT_INITIAL_MATRIX) {
3737     *newmat = M;
3738     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3739     PetscCall(MatDestroy(&Msub));
3740 
3741     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3742     PetscCall(ISDestroy(&iscol_sub));
3743 
3744     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3745     PetscCall(ISDestroy(&iscmap));
3746 
3747     if (iscol_local) {
3748       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3749       PetscCall(ISDestroy(&iscol_local));
3750     }
3751   }
3752   PetscFunctionReturn(PETSC_SUCCESS);
3753 }
3754 
3755 /*
3756     Not great since it makes two copies of the submatrix, first an SeqAIJ
3757   in local and then by concatenating the local matrices the end result.
3758   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3759 
3760   This requires a sequential iscol with all indices.
3761 */
3762 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3763 {
3764   PetscMPIInt rank, size;
3765   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3766   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3767   Mat         M, Mreuse;
3768   MatScalar  *aa, *vwork;
3769   MPI_Comm    comm;
3770   Mat_SeqAIJ *aij;
3771   PetscBool   colflag, allcolumns = PETSC_FALSE;
3772 
3773   PetscFunctionBegin;
3774   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3775   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3776   PetscCallMPI(MPI_Comm_size(comm, &size));
3777 
3778   /* Check for special case: each processor gets entire matrix columns */
3779   PetscCall(ISIdentity(iscol, &colflag));
3780   PetscCall(ISGetLocalSize(iscol, &n));
3781   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3782   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3783 
3784   if (call == MAT_REUSE_MATRIX) {
3785     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3786     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3787     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3788   } else {
3789     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3790   }
3791 
3792   /*
3793       m - number of local rows
3794       n - number of columns (same on all processors)
3795       rstart - first row in new global matrix generated
3796   */
3797   PetscCall(MatGetSize(Mreuse, &m, &n));
3798   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3799   if (call == MAT_INITIAL_MATRIX) {
3800     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3801     ii  = aij->i;
3802     jj  = aij->j;
3803 
3804     /*
3805         Determine the number of non-zeros in the diagonal and off-diagonal
3806         portions of the matrix in order to do correct preallocation
3807     */
3808 
3809     /* first get start and end of "diagonal" columns */
3810     if (csize == PETSC_DECIDE) {
3811       PetscCall(ISGetSize(isrow, &mglobal));
3812       if (mglobal == n) { /* square matrix */
3813         nlocal = m;
3814       } else {
3815         nlocal = n / size + ((n % size) > rank);
3816       }
3817     } else {
3818       nlocal = csize;
3819     }
3820     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3821     rstart = rend - nlocal;
3822     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3823 
3824     /* next, compute all the lengths */
3825     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3826     olens = dlens + m;
3827     for (i = 0; i < m; i++) {
3828       jend = ii[i + 1] - ii[i];
3829       olen = 0;
3830       dlen = 0;
3831       for (j = 0; j < jend; j++) {
3832         if (*jj < rstart || *jj >= rend) olen++;
3833         else dlen++;
3834         jj++;
3835       }
3836       olens[i] = olen;
3837       dlens[i] = dlen;
3838     }
3839     PetscCall(MatCreate(comm, &M));
3840     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3841     PetscCall(MatSetBlockSizes(M, bs, cbs));
3842     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3843     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3844     PetscCall(PetscFree(dlens));
3845   } else {
3846     PetscInt ml, nl;
3847 
3848     M = *newmat;
3849     PetscCall(MatGetLocalSize(M, &ml, &nl));
3850     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3851     PetscCall(MatZeroEntries(M));
3852     /*
3853          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3854        rather than the slower MatSetValues().
3855     */
3856     M->was_assembled = PETSC_TRUE;
3857     M->assembled     = PETSC_FALSE;
3858   }
3859   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3860   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3861   ii  = aij->i;
3862   jj  = aij->j;
3863 
3864   /* trigger copy to CPU if needed */
3865   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3866   for (i = 0; i < m; i++) {
3867     row   = rstart + i;
3868     nz    = ii[i + 1] - ii[i];
3869     cwork = jj;
3870     jj    = PetscSafePointerPlusOffset(jj, nz);
3871     vwork = aa;
3872     aa    = PetscSafePointerPlusOffset(aa, nz);
3873     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3874   }
3875   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3876 
3877   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3878   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3879   *newmat = M;
3880 
3881   /* save submatrix used in processor for next request */
3882   if (call == MAT_INITIAL_MATRIX) {
3883     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3884     PetscCall(MatDestroy(&Mreuse));
3885   }
3886   PetscFunctionReturn(PETSC_SUCCESS);
3887 }
3888 
3889 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3890 {
3891   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3892   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3893   const PetscInt *JJ;
3894   PetscBool       nooffprocentries;
3895   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3896 
3897   PetscFunctionBegin;
3898   PetscCall(PetscLayoutSetUp(B->rmap));
3899   PetscCall(PetscLayoutSetUp(B->cmap));
3900   m       = B->rmap->n;
3901   cstart  = B->cmap->rstart;
3902   cend    = B->cmap->rend;
3903   rstart  = B->rmap->rstart;
3904   irstart = Ii[0];
3905 
3906   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3907 
3908   if (PetscDefined(USE_DEBUG)) {
3909     for (i = 0; i < m; i++) {
3910       nnz = Ii[i + 1] - Ii[i];
3911       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3912       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3913       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3914       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3915     }
3916   }
3917 
3918   for (i = 0; i < m; i++) {
3919     nnz     = Ii[i + 1] - Ii[i];
3920     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3921     nnz_max = PetscMax(nnz_max, nnz);
3922     d       = 0;
3923     for (j = 0; j < nnz; j++) {
3924       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3925     }
3926     d_nnz[i] = d;
3927     o_nnz[i] = nnz - d;
3928   }
3929   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3930   PetscCall(PetscFree2(d_nnz, o_nnz));
3931 
3932   for (i = 0; i < m; i++) {
3933     ii = i + rstart;
3934     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3935   }
3936   nooffprocentries    = B->nooffprocentries;
3937   B->nooffprocentries = PETSC_TRUE;
3938   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3939   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3940   B->nooffprocentries = nooffprocentries;
3941 
3942   /* count number of entries below block diagonal */
3943   PetscCall(PetscFree(Aij->ld));
3944   PetscCall(PetscCalloc1(m, &ld));
3945   Aij->ld = ld;
3946   for (i = 0; i < m; i++) {
3947     nnz = Ii[i + 1] - Ii[i];
3948     j   = 0;
3949     while (j < nnz && J[j] < cstart) j++;
3950     ld[i] = j;
3951     if (J) J += nnz;
3952   }
3953 
3954   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3955   PetscFunctionReturn(PETSC_SUCCESS);
3956 }
3957 
3958 /*@
3959   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3960   (the default parallel PETSc format).
3961 
3962   Collective
3963 
3964   Input Parameters:
3965 + B - the matrix
3966 . i - the indices into `j` for the start of each local row (indices start with zero)
3967 . j - the column indices for each local row (indices start with zero)
3968 - v - optional values in the matrix
3969 
3970   Level: developer
3971 
3972   Notes:
3973   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3974   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3975   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3976 
3977   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3978 
3979   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3980 
3981   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3982 
3983   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3984   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3985 
3986   The format which is used for the sparse matrix input, is equivalent to a
3987   row-major ordering.. i.e for the following matrix, the input data expected is
3988   as shown
3989 .vb
3990         1 0 0
3991         2 0 3     P0
3992        -------
3993         4 5 6     P1
3994 
3995      Process0 [P0] rows_owned=[0,1]
3996         i =  {0,1,3}  [size = nrow+1  = 2+1]
3997         j =  {0,0,2}  [size = 3]
3998         v =  {1,2,3}  [size = 3]
3999 
4000      Process1 [P1] rows_owned=[2]
4001         i =  {0,3}    [size = nrow+1  = 1+1]
4002         j =  {0,1,2}  [size = 3]
4003         v =  {4,5,6}  [size = 3]
4004 .ve
4005 
4006 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4007           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4008 @*/
4009 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4010 {
4011   PetscFunctionBegin;
4012   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4013   PetscFunctionReturn(PETSC_SUCCESS);
4014 }
4015 
4016 /*@
4017   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4018   (the default parallel PETSc format).  For good matrix assembly performance
4019   the user should preallocate the matrix storage by setting the parameters
4020   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4021 
4022   Collective
4023 
4024   Input Parameters:
4025 + B     - the matrix
4026 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4027            (same value is used for all local rows)
4028 . d_nnz - array containing the number of nonzeros in the various rows of the
4029            DIAGONAL portion of the local submatrix (possibly different for each row)
4030            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4031            The size of this array is equal to the number of local rows, i.e 'm'.
4032            For matrices that will be factored, you must leave room for (and set)
4033            the diagonal entry even if it is zero.
4034 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4035            submatrix (same value is used for all local rows).
4036 - o_nnz - array containing the number of nonzeros in the various rows of the
4037            OFF-DIAGONAL portion of the local submatrix (possibly different for
4038            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4039            structure. The size of this array is equal to the number
4040            of local rows, i.e 'm'.
4041 
4042   Example Usage:
4043   Consider the following 8x8 matrix with 34 non-zero values, that is
4044   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4045   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4046   as follows
4047 
4048 .vb
4049             1  2  0  |  0  3  0  |  0  4
4050     Proc0   0  5  6  |  7  0  0  |  8  0
4051             9  0 10  | 11  0  0  | 12  0
4052     -------------------------------------
4053            13  0 14  | 15 16 17  |  0  0
4054     Proc1   0 18  0  | 19 20 21  |  0  0
4055             0  0  0  | 22 23  0  | 24  0
4056     -------------------------------------
4057     Proc2  25 26 27  |  0  0 28  | 29  0
4058            30  0  0  | 31 32 33  |  0 34
4059 .ve
4060 
4061   This can be represented as a collection of submatrices as
4062 .vb
4063       A B C
4064       D E F
4065       G H I
4066 .ve
4067 
4068   Where the submatrices A,B,C are owned by proc0, D,E,F are
4069   owned by proc1, G,H,I are owned by proc2.
4070 
4071   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4072   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4073   The 'M','N' parameters are 8,8, and have the same values on all procs.
4074 
4075   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4076   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4077   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4078   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4079   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4080   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4081 
4082   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4083   allocated for every row of the local diagonal submatrix, and `o_nz`
4084   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4085   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4086   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4087   In this case, the values of `d_nz`, `o_nz` are
4088 .vb
4089      proc0  dnz = 2, o_nz = 2
4090      proc1  dnz = 3, o_nz = 2
4091      proc2  dnz = 1, o_nz = 4
4092 .ve
4093   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4094   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4095   for proc3. i.e we are using 12+15+10=37 storage locations to store
4096   34 values.
4097 
4098   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4099   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4100   In the above case the values for `d_nnz`, `o_nnz` are
4101 .vb
4102      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4103      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4104      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4105 .ve
4106   Here the space allocated is sum of all the above values i.e 34, and
4107   hence pre-allocation is perfect.
4108 
4109   Level: intermediate
4110 
4111   Notes:
4112   If the *_nnz parameter is given then the *_nz parameter is ignored
4113 
4114   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4115   storage.  The stored row and column indices begin with zero.
4116   See [Sparse Matrices](sec_matsparse) for details.
4117 
4118   The parallel matrix is partitioned such that the first m0 rows belong to
4119   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4120   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4121 
4122   The DIAGONAL portion of the local submatrix of a processor can be defined
4123   as the submatrix which is obtained by extraction the part corresponding to
4124   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4125   first row that belongs to the processor, r2 is the last row belonging to
4126   the this processor, and c1-c2 is range of indices of the local part of a
4127   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4128   common case of a square matrix, the row and column ranges are the same and
4129   the DIAGONAL part is also square. The remaining portion of the local
4130   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4131 
4132   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4133 
4134   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4135   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4136   You can also run with the option `-info` and look for messages with the string
4137   malloc in them to see if additional memory allocation was needed.
4138 
4139 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4140           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4141 @*/
4142 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4143 {
4144   PetscFunctionBegin;
4145   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4146   PetscValidType(B, 1);
4147   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4148   PetscFunctionReturn(PETSC_SUCCESS);
4149 }
4150 
4151 /*@
4152   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4153   CSR format for the local rows.
4154 
4155   Collective
4156 
4157   Input Parameters:
4158 + comm - MPI communicator
4159 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4160 . n    - This value should be the same as the local size used in creating the
4161          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4162          calculated if `N` is given) For square matrices n is almost always `m`.
4163 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4164 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4165 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4166 . j    - global column indices
4167 - a    - optional matrix values
4168 
4169   Output Parameter:
4170 . mat - the matrix
4171 
4172   Level: intermediate
4173 
4174   Notes:
4175   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4176   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4177   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4178 
4179   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4180 
4181   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4182 
4183   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4184   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4185 
4186   The format which is used for the sparse matrix input, is equivalent to a
4187   row-major ordering, i.e., for the following matrix, the input data expected is
4188   as shown
4189 .vb
4190         1 0 0
4191         2 0 3     P0
4192        -------
4193         4 5 6     P1
4194 
4195      Process0 [P0] rows_owned=[0,1]
4196         i =  {0,1,3}  [size = nrow+1  = 2+1]
4197         j =  {0,0,2}  [size = 3]
4198         v =  {1,2,3}  [size = 3]
4199 
4200      Process1 [P1] rows_owned=[2]
4201         i =  {0,3}    [size = nrow+1  = 1+1]
4202         j =  {0,1,2}  [size = 3]
4203         v =  {4,5,6}  [size = 3]
4204 .ve
4205 
4206 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4207           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4208 @*/
4209 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4210 {
4211   PetscFunctionBegin;
4212   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4213   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4214   PetscCall(MatCreate(comm, mat));
4215   PetscCall(MatSetSizes(*mat, m, n, M, N));
4216   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4217   PetscCall(MatSetType(*mat, MATMPIAIJ));
4218   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4219   PetscFunctionReturn(PETSC_SUCCESS);
4220 }
4221 
4222 /*@
4223   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4224   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4225   from `MatCreateMPIAIJWithArrays()`
4226 
4227   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4228 
4229   Collective
4230 
4231   Input Parameters:
4232 + mat - the matrix
4233 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4234 . n   - This value should be the same as the local size used in creating the
4235        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4236        calculated if N is given) For square matrices n is almost always m.
4237 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4238 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4239 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4240 . J   - column indices
4241 - v   - matrix values
4242 
4243   Level: deprecated
4244 
4245 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4246           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4247 @*/
4248 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4249 {
4250   PetscInt        nnz, i;
4251   PetscBool       nooffprocentries;
4252   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4253   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4254   PetscScalar    *ad, *ao;
4255   PetscInt        ldi, Iii, md;
4256   const PetscInt *Adi = Ad->i;
4257   PetscInt       *ld  = Aij->ld;
4258 
4259   PetscFunctionBegin;
4260   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4261   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4262   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4263   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4264 
4265   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4266   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4267 
4268   for (i = 0; i < m; i++) {
4269     if (PetscDefined(USE_DEBUG)) {
4270       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4271         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4272         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4273       }
4274     }
4275     nnz = Ii[i + 1] - Ii[i];
4276     Iii = Ii[i];
4277     ldi = ld[i];
4278     md  = Adi[i + 1] - Adi[i];
4279     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4280     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4281     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4282     ad += md;
4283     ao += nnz - md;
4284   }
4285   nooffprocentries      = mat->nooffprocentries;
4286   mat->nooffprocentries = PETSC_TRUE;
4287   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4288   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4289   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4290   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4291   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4292   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4293   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4294   mat->nooffprocentries = nooffprocentries;
4295   PetscFunctionReturn(PETSC_SUCCESS);
4296 }
4297 
4298 /*@
4299   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4300 
4301   Collective
4302 
4303   Input Parameters:
4304 + mat - the matrix
4305 - v   - matrix values, stored by row
4306 
4307   Level: intermediate
4308 
4309   Notes:
4310   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4311 
4312   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4313 
4314 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4315           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4316 @*/
4317 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4318 {
4319   PetscInt        nnz, i, m;
4320   PetscBool       nooffprocentries;
4321   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4322   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4323   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4324   PetscScalar    *ad, *ao;
4325   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4326   PetscInt        ldi, Iii, md;
4327   PetscInt       *ld = Aij->ld;
4328 
4329   PetscFunctionBegin;
4330   m = mat->rmap->n;
4331 
4332   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4333   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4334   Iii = 0;
4335   for (i = 0; i < m; i++) {
4336     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4337     ldi = ld[i];
4338     md  = Adi[i + 1] - Adi[i];
4339     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4340     ad += md;
4341     if (ao) {
4342       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4343       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4344       ao += nnz - md;
4345     }
4346     Iii += nnz;
4347   }
4348   nooffprocentries      = mat->nooffprocentries;
4349   mat->nooffprocentries = PETSC_TRUE;
4350   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4351   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4352   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4353   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4354   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4355   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4356   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4357   mat->nooffprocentries = nooffprocentries;
4358   PetscFunctionReturn(PETSC_SUCCESS);
4359 }
4360 
4361 /*@
4362   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4363   (the default parallel PETSc format).  For good matrix assembly performance
4364   the user should preallocate the matrix storage by setting the parameters
4365   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4366 
4367   Collective
4368 
4369   Input Parameters:
4370 + comm  - MPI communicator
4371 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4372           This value should be the same as the local size used in creating the
4373           y vector for the matrix-vector product y = Ax.
4374 . n     - This value should be the same as the local size used in creating the
4375           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4376           calculated if N is given) For square matrices n is almost always m.
4377 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4378 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4379 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4380           (same value is used for all local rows)
4381 . d_nnz - array containing the number of nonzeros in the various rows of the
4382           DIAGONAL portion of the local submatrix (possibly different for each row)
4383           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4384           The size of this array is equal to the number of local rows, i.e 'm'.
4385 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4386           submatrix (same value is used for all local rows).
4387 - o_nnz - array containing the number of nonzeros in the various rows of the
4388           OFF-DIAGONAL portion of the local submatrix (possibly different for
4389           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4390           structure. The size of this array is equal to the number
4391           of local rows, i.e 'm'.
4392 
4393   Output Parameter:
4394 . A - the matrix
4395 
4396   Options Database Keys:
4397 + -mat_no_inode                     - Do not use inodes
4398 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4399 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4400                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4401                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4402 
4403   Level: intermediate
4404 
4405   Notes:
4406   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4407   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4408   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4409 
4410   If the *_nnz parameter is given then the *_nz parameter is ignored
4411 
4412   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4413   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4414   storage requirements for this matrix.
4415 
4416   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4417   processor than it must be used on all processors that share the object for
4418   that argument.
4419 
4420   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4421   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4422 
4423   The user MUST specify either the local or global matrix dimensions
4424   (possibly both).
4425 
4426   The parallel matrix is partitioned across processors such that the
4427   first `m0` rows belong to process 0, the next `m1` rows belong to
4428   process 1, the next `m2` rows belong to process 2, etc., where
4429   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4430   values corresponding to [m x N] submatrix.
4431 
4432   The columns are logically partitioned with the n0 columns belonging
4433   to 0th partition, the next n1 columns belonging to the next
4434   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4435 
4436   The DIAGONAL portion of the local submatrix on any given processor
4437   is the submatrix corresponding to the rows and columns m,n
4438   corresponding to the given processor. i.e diagonal matrix on
4439   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4440   etc. The remaining portion of the local submatrix [m x (N-n)]
4441   constitute the OFF-DIAGONAL portion. The example below better
4442   illustrates this concept.
4443 
4444   For a square global matrix we define each processor's diagonal portion
4445   to be its local rows and the corresponding columns (a square submatrix);
4446   each processor's off-diagonal portion encompasses the remainder of the
4447   local matrix (a rectangular submatrix).
4448 
4449   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4450 
4451   When calling this routine with a single process communicator, a matrix of
4452   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4453   type of communicator, use the construction mechanism
4454 .vb
4455   MatCreate(..., &A);
4456   MatSetType(A, MATMPIAIJ);
4457   MatSetSizes(A, m, n, M, N);
4458   MatMPIAIJSetPreallocation(A, ...);
4459 .ve
4460 
4461   By default, this format uses inodes (identical nodes) when possible.
4462   We search for consecutive rows with the same nonzero structure, thereby
4463   reusing matrix information to achieve increased efficiency.
4464 
4465   Example Usage:
4466   Consider the following 8x8 matrix with 34 non-zero values, that is
4467   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4468   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4469   as follows
4470 
4471 .vb
4472             1  2  0  |  0  3  0  |  0  4
4473     Proc0   0  5  6  |  7  0  0  |  8  0
4474             9  0 10  | 11  0  0  | 12  0
4475     -------------------------------------
4476            13  0 14  | 15 16 17  |  0  0
4477     Proc1   0 18  0  | 19 20 21  |  0  0
4478             0  0  0  | 22 23  0  | 24  0
4479     -------------------------------------
4480     Proc2  25 26 27  |  0  0 28  | 29  0
4481            30  0  0  | 31 32 33  |  0 34
4482 .ve
4483 
4484   This can be represented as a collection of submatrices as
4485 
4486 .vb
4487       A B C
4488       D E F
4489       G H I
4490 .ve
4491 
4492   Where the submatrices A,B,C are owned by proc0, D,E,F are
4493   owned by proc1, G,H,I are owned by proc2.
4494 
4495   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4496   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4497   The 'M','N' parameters are 8,8, and have the same values on all procs.
4498 
4499   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4500   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4501   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4502   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4503   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4504   matrix, ans [DF] as another SeqAIJ matrix.
4505 
4506   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4507   allocated for every row of the local diagonal submatrix, and `o_nz`
4508   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4509   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4510   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4511   In this case, the values of `d_nz`,`o_nz` are
4512 .vb
4513      proc0  dnz = 2, o_nz = 2
4514      proc1  dnz = 3, o_nz = 2
4515      proc2  dnz = 1, o_nz = 4
4516 .ve
4517   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4518   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4519   for proc3. i.e we are using 12+15+10=37 storage locations to store
4520   34 values.
4521 
4522   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4523   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4524   In the above case the values for d_nnz,o_nnz are
4525 .vb
4526      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4527      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4528      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4529 .ve
4530   Here the space allocated is sum of all the above values i.e 34, and
4531   hence pre-allocation is perfect.
4532 
4533 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4534           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4535           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4536 @*/
4537 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4538 {
4539   PetscMPIInt size;
4540 
4541   PetscFunctionBegin;
4542   PetscCall(MatCreate(comm, A));
4543   PetscCall(MatSetSizes(*A, m, n, M, N));
4544   PetscCallMPI(MPI_Comm_size(comm, &size));
4545   if (size > 1) {
4546     PetscCall(MatSetType(*A, MATMPIAIJ));
4547     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4548   } else {
4549     PetscCall(MatSetType(*A, MATSEQAIJ));
4550     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4551   }
4552   PetscFunctionReturn(PETSC_SUCCESS);
4553 }
4554 
4555 /*MC
4556     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4557 
4558     Synopsis:
4559     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4560 
4561     Not Collective
4562 
4563     Input Parameter:
4564 .   A - the `MATMPIAIJ` matrix
4565 
4566     Output Parameters:
4567 +   Ad - the diagonal portion of the matrix
4568 .   Ao - the off-diagonal portion of the matrix
4569 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4570 -   ierr - error code
4571 
4572      Level: advanced
4573 
4574     Note:
4575     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4576 
4577 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4578 M*/
4579 
4580 /*MC
4581     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4582 
4583     Synopsis:
4584     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4585 
4586     Not Collective
4587 
4588     Input Parameters:
4589 +   A - the `MATMPIAIJ` matrix
4590 .   Ad - the diagonal portion of the matrix
4591 .   Ao - the off-diagonal portion of the matrix
4592 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4593 -   ierr - error code
4594 
4595      Level: advanced
4596 
4597 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4598 M*/
4599 
4600 /*@C
4601   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4602 
4603   Not Collective
4604 
4605   Input Parameter:
4606 . A - The `MATMPIAIJ` matrix
4607 
4608   Output Parameters:
4609 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4610 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4611 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4612 
4613   Level: intermediate
4614 
4615   Note:
4616   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4617   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4618   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4619   local column numbers to global column numbers in the original matrix.
4620 
4621   Fortran Notes:
4622   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4623 
4624 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4625 @*/
4626 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4627 {
4628   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4629   PetscBool   flg;
4630 
4631   PetscFunctionBegin;
4632   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4633   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4634   if (Ad) *Ad = a->A;
4635   if (Ao) *Ao = a->B;
4636   if (colmap) *colmap = a->garray;
4637   PetscFunctionReturn(PETSC_SUCCESS);
4638 }
4639 
4640 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4641 {
4642   PetscInt     m, N, i, rstart, nnz, Ii;
4643   PetscInt    *indx;
4644   PetscScalar *values;
4645   MatType      rootType;
4646 
4647   PetscFunctionBegin;
4648   PetscCall(MatGetSize(inmat, &m, &N));
4649   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4650     PetscInt *dnz, *onz, sum, bs, cbs;
4651 
4652     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4653     /* Check sum(n) = N */
4654     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4655     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4656 
4657     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4658     rstart -= m;
4659 
4660     MatPreallocateBegin(comm, m, n, dnz, onz);
4661     for (i = 0; i < m; i++) {
4662       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4663       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4664       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4665     }
4666 
4667     PetscCall(MatCreate(comm, outmat));
4668     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4669     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4670     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4671     PetscCall(MatGetRootType_Private(inmat, &rootType));
4672     PetscCall(MatSetType(*outmat, rootType));
4673     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4674     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4675     MatPreallocateEnd(dnz, onz);
4676     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4677   }
4678 
4679   /* numeric phase */
4680   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4681   for (i = 0; i < m; i++) {
4682     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4683     Ii = i + rstart;
4684     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4685     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4686   }
4687   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4688   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4689   PetscFunctionReturn(PETSC_SUCCESS);
4690 }
4691 
4692 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4693 {
4694   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4695 
4696   PetscFunctionBegin;
4697   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4698   PetscCall(PetscFree(merge->id_r));
4699   PetscCall(PetscFree(merge->len_s));
4700   PetscCall(PetscFree(merge->len_r));
4701   PetscCall(PetscFree(merge->bi));
4702   PetscCall(PetscFree(merge->bj));
4703   PetscCall(PetscFree(merge->buf_ri[0]));
4704   PetscCall(PetscFree(merge->buf_ri));
4705   PetscCall(PetscFree(merge->buf_rj[0]));
4706   PetscCall(PetscFree(merge->buf_rj));
4707   PetscCall(PetscFree(merge->coi));
4708   PetscCall(PetscFree(merge->coj));
4709   PetscCall(PetscFree(merge->owners_co));
4710   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4711   PetscCall(PetscFree(merge));
4712   PetscFunctionReturn(PETSC_SUCCESS);
4713 }
4714 
4715 #include <../src/mat/utils/freespace.h>
4716 #include <petscbt.h>
4717 
4718 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4719 {
4720   MPI_Comm             comm;
4721   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4722   PetscMPIInt          size, rank, taga, *len_s;
4723   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4724   PetscMPIInt          proc, k;
4725   PetscInt           **buf_ri, **buf_rj;
4726   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4727   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4728   MPI_Request         *s_waits, *r_waits;
4729   MPI_Status          *status;
4730   const MatScalar     *aa, *a_a;
4731   MatScalar          **abuf_r, *ba_i;
4732   Mat_Merge_SeqsToMPI *merge;
4733   PetscContainer       container;
4734 
4735   PetscFunctionBegin;
4736   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4737   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4738 
4739   PetscCallMPI(MPI_Comm_size(comm, &size));
4740   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4741 
4742   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4743   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4744   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4745   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4746   aa = a_a;
4747 
4748   bi     = merge->bi;
4749   bj     = merge->bj;
4750   buf_ri = merge->buf_ri;
4751   buf_rj = merge->buf_rj;
4752 
4753   PetscCall(PetscMalloc1(size, &status));
4754   owners = merge->rowmap->range;
4755   len_s  = merge->len_s;
4756 
4757   /* send and recv matrix values */
4758   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4759   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4760 
4761   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4762   for (proc = 0, k = 0; proc < size; proc++) {
4763     if (!len_s[proc]) continue;
4764     i = owners[proc];
4765     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4766     k++;
4767   }
4768 
4769   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4770   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4771   PetscCall(PetscFree(status));
4772 
4773   PetscCall(PetscFree(s_waits));
4774   PetscCall(PetscFree(r_waits));
4775 
4776   /* insert mat values of mpimat */
4777   PetscCall(PetscMalloc1(N, &ba_i));
4778   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4779 
4780   for (k = 0; k < merge->nrecv; k++) {
4781     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4782     nrows       = *buf_ri_k[k];
4783     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4784     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4785   }
4786 
4787   /* set values of ba */
4788   m = merge->rowmap->n;
4789   for (i = 0; i < m; i++) {
4790     arow = owners[rank] + i;
4791     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4792     bnzi = bi[i + 1] - bi[i];
4793     PetscCall(PetscArrayzero(ba_i, bnzi));
4794 
4795     /* add local non-zero vals of this proc's seqmat into ba */
4796     anzi   = ai[arow + 1] - ai[arow];
4797     aj     = a->j + ai[arow];
4798     aa     = a_a + ai[arow];
4799     nextaj = 0;
4800     for (j = 0; nextaj < anzi; j++) {
4801       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4802         ba_i[j] += aa[nextaj++];
4803       }
4804     }
4805 
4806     /* add received vals into ba */
4807     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4808       /* i-th row */
4809       if (i == *nextrow[k]) {
4810         anzi   = *(nextai[k] + 1) - *nextai[k];
4811         aj     = buf_rj[k] + *nextai[k];
4812         aa     = abuf_r[k] + *nextai[k];
4813         nextaj = 0;
4814         for (j = 0; nextaj < anzi; j++) {
4815           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4816             ba_i[j] += aa[nextaj++];
4817           }
4818         }
4819         nextrow[k]++;
4820         nextai[k]++;
4821       }
4822     }
4823     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4824   }
4825   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4826   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4827   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4828 
4829   PetscCall(PetscFree(abuf_r[0]));
4830   PetscCall(PetscFree(abuf_r));
4831   PetscCall(PetscFree(ba_i));
4832   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4833   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4834   PetscFunctionReturn(PETSC_SUCCESS);
4835 }
4836 
4837 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4838 {
4839   Mat                  B_mpi;
4840   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4841   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4842   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4843   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4844   PetscInt             len, *dnz, *onz, bs, cbs;
4845   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4846   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4847   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4848   MPI_Status          *status;
4849   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4850   PetscBT              lnkbt;
4851   Mat_Merge_SeqsToMPI *merge;
4852   PetscContainer       container;
4853 
4854   PetscFunctionBegin;
4855   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4856 
4857   /* make sure it is a PETSc comm */
4858   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4859   PetscCallMPI(MPI_Comm_size(comm, &size));
4860   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4861 
4862   PetscCall(PetscNew(&merge));
4863   PetscCall(PetscMalloc1(size, &status));
4864 
4865   /* determine row ownership */
4866   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4867   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4868   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4869   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4870   PetscCall(PetscLayoutSetUp(merge->rowmap));
4871   PetscCall(PetscMalloc1(size, &len_si));
4872   PetscCall(PetscMalloc1(size, &merge->len_s));
4873 
4874   m      = merge->rowmap->n;
4875   owners = merge->rowmap->range;
4876 
4877   /* determine the number of messages to send, their lengths */
4878   len_s = merge->len_s;
4879 
4880   len          = 0; /* length of buf_si[] */
4881   merge->nsend = 0;
4882   for (PetscMPIInt proc = 0; proc < size; proc++) {
4883     len_si[proc] = 0;
4884     if (proc == rank) {
4885       len_s[proc] = 0;
4886     } else {
4887       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4888       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4889     }
4890     if (len_s[proc]) {
4891       merge->nsend++;
4892       nrows = 0;
4893       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4894         if (ai[i + 1] > ai[i]) nrows++;
4895       }
4896       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4897       len += len_si[proc];
4898     }
4899   }
4900 
4901   /* determine the number and length of messages to receive for ij-structure */
4902   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4903   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4904 
4905   /* post the Irecv of j-structure */
4906   PetscCall(PetscCommGetNewTag(comm, &tagj));
4907   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4908 
4909   /* post the Isend of j-structure */
4910   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4911 
4912   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4913     if (!len_s[proc]) continue;
4914     i = owners[proc];
4915     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4916     k++;
4917   }
4918 
4919   /* receives and sends of j-structure are complete */
4920   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4921   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4922 
4923   /* send and recv i-structure */
4924   PetscCall(PetscCommGetNewTag(comm, &tagi));
4925   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4926 
4927   PetscCall(PetscMalloc1(len + 1, &buf_s));
4928   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4929   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4930     if (!len_s[proc]) continue;
4931     /* form outgoing message for i-structure:
4932          buf_si[0]:                 nrows to be sent
4933                [1:nrows]:           row index (global)
4934                [nrows+1:2*nrows+1]: i-structure index
4935     */
4936     nrows       = len_si[proc] / 2 - 1;
4937     buf_si_i    = buf_si + nrows + 1;
4938     buf_si[0]   = nrows;
4939     buf_si_i[0] = 0;
4940     nrows       = 0;
4941     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4942       anzi = ai[i + 1] - ai[i];
4943       if (anzi) {
4944         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4945         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4946         nrows++;
4947       }
4948     }
4949     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4950     k++;
4951     buf_si += len_si[proc];
4952   }
4953 
4954   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4955   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4956 
4957   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4958   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4959 
4960   PetscCall(PetscFree(len_si));
4961   PetscCall(PetscFree(len_ri));
4962   PetscCall(PetscFree(rj_waits));
4963   PetscCall(PetscFree2(si_waits, sj_waits));
4964   PetscCall(PetscFree(ri_waits));
4965   PetscCall(PetscFree(buf_s));
4966   PetscCall(PetscFree(status));
4967 
4968   /* compute a local seq matrix in each processor */
4969   /* allocate bi array and free space for accumulating nonzero column info */
4970   PetscCall(PetscMalloc1(m + 1, &bi));
4971   bi[0] = 0;
4972 
4973   /* create and initialize a linked list */
4974   nlnk = N + 1;
4975   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4976 
4977   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4978   len = ai[owners[rank + 1]] - ai[owners[rank]];
4979   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4980 
4981   current_space = free_space;
4982 
4983   /* determine symbolic info for each local row */
4984   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4985 
4986   for (k = 0; k < merge->nrecv; k++) {
4987     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4988     nrows       = *buf_ri_k[k];
4989     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4990     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4991   }
4992 
4993   MatPreallocateBegin(comm, m, n, dnz, onz);
4994   len = 0;
4995   for (i = 0; i < m; i++) {
4996     bnzi = 0;
4997     /* add local non-zero cols of this proc's seqmat into lnk */
4998     arow = owners[rank] + i;
4999     anzi = ai[arow + 1] - ai[arow];
5000     aj   = a->j + ai[arow];
5001     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5002     bnzi += nlnk;
5003     /* add received col data into lnk */
5004     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5005       if (i == *nextrow[k]) {            /* i-th row */
5006         anzi = *(nextai[k] + 1) - *nextai[k];
5007         aj   = buf_rj[k] + *nextai[k];
5008         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5009         bnzi += nlnk;
5010         nextrow[k]++;
5011         nextai[k]++;
5012       }
5013     }
5014     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5015 
5016     /* if free space is not available, make more free space */
5017     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5018     /* copy data into free space, then initialize lnk */
5019     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5020     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5021 
5022     current_space->array += bnzi;
5023     current_space->local_used += bnzi;
5024     current_space->local_remaining -= bnzi;
5025 
5026     bi[i + 1] = bi[i] + bnzi;
5027   }
5028 
5029   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5030 
5031   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5032   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5033   PetscCall(PetscLLDestroy(lnk, lnkbt));
5034 
5035   /* create symbolic parallel matrix B_mpi */
5036   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5037   PetscCall(MatCreate(comm, &B_mpi));
5038   if (n == PETSC_DECIDE) {
5039     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5040   } else {
5041     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5042   }
5043   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5044   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5045   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5046   MatPreallocateEnd(dnz, onz);
5047   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5048 
5049   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5050   B_mpi->assembled = PETSC_FALSE;
5051   merge->bi        = bi;
5052   merge->bj        = bj;
5053   merge->buf_ri    = buf_ri;
5054   merge->buf_rj    = buf_rj;
5055   merge->coi       = NULL;
5056   merge->coj       = NULL;
5057   merge->owners_co = NULL;
5058 
5059   PetscCall(PetscCommDestroy(&comm));
5060 
5061   /* attach the supporting struct to B_mpi for reuse */
5062   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5063   PetscCall(PetscContainerSetPointer(container, merge));
5064   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5065   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5066   PetscCall(PetscContainerDestroy(&container));
5067   *mpimat = B_mpi;
5068 
5069   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5070   PetscFunctionReturn(PETSC_SUCCESS);
5071 }
5072 
5073 /*@
5074   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5075   matrices from each processor
5076 
5077   Collective
5078 
5079   Input Parameters:
5080 + comm   - the communicators the parallel matrix will live on
5081 . seqmat - the input sequential matrices
5082 . m      - number of local rows (or `PETSC_DECIDE`)
5083 . n      - number of local columns (or `PETSC_DECIDE`)
5084 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5085 
5086   Output Parameter:
5087 . mpimat - the parallel matrix generated
5088 
5089   Level: advanced
5090 
5091   Note:
5092   The dimensions of the sequential matrix in each processor MUST be the same.
5093   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5094   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5095 
5096 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5097 @*/
5098 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5099 {
5100   PetscMPIInt size;
5101 
5102   PetscFunctionBegin;
5103   PetscCallMPI(MPI_Comm_size(comm, &size));
5104   if (size == 1) {
5105     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5106     if (scall == MAT_INITIAL_MATRIX) {
5107       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5108     } else {
5109       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5110     }
5111     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5112     PetscFunctionReturn(PETSC_SUCCESS);
5113   }
5114   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5115   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5116   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5117   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5118   PetscFunctionReturn(PETSC_SUCCESS);
5119 }
5120 
5121 /*@
5122   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5123 
5124   Not Collective
5125 
5126   Input Parameter:
5127 . A - the matrix
5128 
5129   Output Parameter:
5130 . A_loc - the local sequential matrix generated
5131 
5132   Level: developer
5133 
5134   Notes:
5135   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5136   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5137   `n` is the global column count obtained with `MatGetSize()`
5138 
5139   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5140 
5141   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5142 
5143   Destroy the matrix with `MatDestroy()`
5144 
5145 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5146 @*/
5147 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5148 {
5149   PetscBool mpi;
5150 
5151   PetscFunctionBegin;
5152   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5153   if (mpi) {
5154     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5155   } else {
5156     *A_loc = A;
5157     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5158   }
5159   PetscFunctionReturn(PETSC_SUCCESS);
5160 }
5161 
5162 /*@
5163   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5164 
5165   Not Collective
5166 
5167   Input Parameters:
5168 + A     - the matrix
5169 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5170 
5171   Output Parameter:
5172 . A_loc - the local sequential matrix generated
5173 
5174   Level: developer
5175 
5176   Notes:
5177   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5178   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5179   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5180 
5181   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5182 
5183   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5184   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5185   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5186   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5187 
5188 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5189 @*/
5190 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5191 {
5192   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5193   Mat_SeqAIJ        *mat, *a, *b;
5194   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5195   const PetscScalar *aa, *ba, *aav, *bav;
5196   PetscScalar       *ca, *cam;
5197   PetscMPIInt        size;
5198   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5199   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5200   PetscBool          match;
5201 
5202   PetscFunctionBegin;
5203   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5204   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5205   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5206   if (size == 1) {
5207     if (scall == MAT_INITIAL_MATRIX) {
5208       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5209       *A_loc = mpimat->A;
5210     } else if (scall == MAT_REUSE_MATRIX) {
5211       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5212     }
5213     PetscFunctionReturn(PETSC_SUCCESS);
5214   }
5215 
5216   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5217   a  = (Mat_SeqAIJ *)mpimat->A->data;
5218   b  = (Mat_SeqAIJ *)mpimat->B->data;
5219   ai = a->i;
5220   aj = a->j;
5221   bi = b->i;
5222   bj = b->j;
5223   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5224   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5225   aa = aav;
5226   ba = bav;
5227   if (scall == MAT_INITIAL_MATRIX) {
5228     PetscCall(PetscMalloc1(1 + am, &ci));
5229     ci[0] = 0;
5230     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5231     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5232     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5233     k = 0;
5234     for (i = 0; i < am; i++) {
5235       ncols_o = bi[i + 1] - bi[i];
5236       ncols_d = ai[i + 1] - ai[i];
5237       /* off-diagonal portion of A */
5238       for (jo = 0; jo < ncols_o; jo++) {
5239         col = cmap[*bj];
5240         if (col >= cstart) break;
5241         cj[k] = col;
5242         bj++;
5243         ca[k++] = *ba++;
5244       }
5245       /* diagonal portion of A */
5246       for (j = 0; j < ncols_d; j++) {
5247         cj[k]   = cstart + *aj++;
5248         ca[k++] = *aa++;
5249       }
5250       /* off-diagonal portion of A */
5251       for (j = jo; j < ncols_o; j++) {
5252         cj[k]   = cmap[*bj++];
5253         ca[k++] = *ba++;
5254       }
5255     }
5256     /* put together the new matrix */
5257     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5258     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5259     /* Since these are PETSc arrays, change flags to free them as necessary. */
5260     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5261     mat->free_a  = PETSC_TRUE;
5262     mat->free_ij = PETSC_TRUE;
5263     mat->nonew   = 0;
5264   } else if (scall == MAT_REUSE_MATRIX) {
5265     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5266     ci  = mat->i;
5267     cj  = mat->j;
5268     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5269     for (i = 0; i < am; i++) {
5270       /* off-diagonal portion of A */
5271       ncols_o = bi[i + 1] - bi[i];
5272       for (jo = 0; jo < ncols_o; jo++) {
5273         col = cmap[*bj];
5274         if (col >= cstart) break;
5275         *cam++ = *ba++;
5276         bj++;
5277       }
5278       /* diagonal portion of A */
5279       ncols_d = ai[i + 1] - ai[i];
5280       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5281       /* off-diagonal portion of A */
5282       for (j = jo; j < ncols_o; j++) {
5283         *cam++ = *ba++;
5284         bj++;
5285       }
5286     }
5287     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5288   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5289   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5290   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5291   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5292   PetscFunctionReturn(PETSC_SUCCESS);
5293 }
5294 
5295 /*@
5296   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5297   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5298 
5299   Not Collective
5300 
5301   Input Parameters:
5302 + A     - the matrix
5303 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5304 
5305   Output Parameters:
5306 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5307 - A_loc - the local sequential matrix generated
5308 
5309   Level: developer
5310 
5311   Note:
5312   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5313   part, then those associated with the off-diagonal part (in its local ordering)
5314 
5315 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5316 @*/
5317 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5318 {
5319   Mat             Ao, Ad;
5320   const PetscInt *cmap;
5321   PetscMPIInt     size;
5322   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5323 
5324   PetscFunctionBegin;
5325   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5326   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5327   if (size == 1) {
5328     if (scall == MAT_INITIAL_MATRIX) {
5329       PetscCall(PetscObjectReference((PetscObject)Ad));
5330       *A_loc = Ad;
5331     } else if (scall == MAT_REUSE_MATRIX) {
5332       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5333     }
5334     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5335     PetscFunctionReturn(PETSC_SUCCESS);
5336   }
5337   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5338   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5339   if (f) {
5340     PetscCall((*f)(A, scall, glob, A_loc));
5341   } else {
5342     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5343     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5344     Mat_SeqAIJ        *c;
5345     PetscInt          *ai = a->i, *aj = a->j;
5346     PetscInt          *bi = b->i, *bj = b->j;
5347     PetscInt          *ci, *cj;
5348     const PetscScalar *aa, *ba;
5349     PetscScalar       *ca;
5350     PetscInt           i, j, am, dn, on;
5351 
5352     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5353     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5354     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5355     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5356     if (scall == MAT_INITIAL_MATRIX) {
5357       PetscInt k;
5358       PetscCall(PetscMalloc1(1 + am, &ci));
5359       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5360       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5361       ci[0] = 0;
5362       for (i = 0, k = 0; i < am; i++) {
5363         const PetscInt ncols_o = bi[i + 1] - bi[i];
5364         const PetscInt ncols_d = ai[i + 1] - ai[i];
5365         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5366         /* diagonal portion of A */
5367         for (j = 0; j < ncols_d; j++, k++) {
5368           cj[k] = *aj++;
5369           ca[k] = *aa++;
5370         }
5371         /* off-diagonal portion of A */
5372         for (j = 0; j < ncols_o; j++, k++) {
5373           cj[k] = dn + *bj++;
5374           ca[k] = *ba++;
5375         }
5376       }
5377       /* put together the new matrix */
5378       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5379       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5380       /* Since these are PETSc arrays, change flags to free them as necessary. */
5381       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5382       c->free_a  = PETSC_TRUE;
5383       c->free_ij = PETSC_TRUE;
5384       c->nonew   = 0;
5385       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5386     } else if (scall == MAT_REUSE_MATRIX) {
5387       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5388       for (i = 0; i < am; i++) {
5389         const PetscInt ncols_d = ai[i + 1] - ai[i];
5390         const PetscInt ncols_o = bi[i + 1] - bi[i];
5391         /* diagonal portion of A */
5392         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5393         /* off-diagonal portion of A */
5394         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5395       }
5396       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5397     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5398     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5399     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5400     if (glob) {
5401       PetscInt cst, *gidx;
5402 
5403       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5404       PetscCall(PetscMalloc1(dn + on, &gidx));
5405       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5406       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5407       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5408     }
5409   }
5410   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5411   PetscFunctionReturn(PETSC_SUCCESS);
5412 }
5413 
5414 /*@C
5415   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5416 
5417   Not Collective
5418 
5419   Input Parameters:
5420 + A     - the matrix
5421 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5422 . row   - index set of rows to extract (or `NULL`)
5423 - col   - index set of columns to extract (or `NULL`)
5424 
5425   Output Parameter:
5426 . A_loc - the local sequential matrix generated
5427 
5428   Level: developer
5429 
5430 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5431 @*/
5432 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5433 {
5434   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5435   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5436   IS          isrowa, iscola;
5437   Mat        *aloc;
5438   PetscBool   match;
5439 
5440   PetscFunctionBegin;
5441   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5442   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5443   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5444   if (!row) {
5445     start = A->rmap->rstart;
5446     end   = A->rmap->rend;
5447     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5448   } else {
5449     isrowa = *row;
5450   }
5451   if (!col) {
5452     start = A->cmap->rstart;
5453     cmap  = a->garray;
5454     nzA   = a->A->cmap->n;
5455     nzB   = a->B->cmap->n;
5456     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5457     ncols = 0;
5458     for (i = 0; i < nzB; i++) {
5459       if (cmap[i] < start) idx[ncols++] = cmap[i];
5460       else break;
5461     }
5462     imark = i;
5463     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5464     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5465     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5466   } else {
5467     iscola = *col;
5468   }
5469   if (scall != MAT_INITIAL_MATRIX) {
5470     PetscCall(PetscMalloc1(1, &aloc));
5471     aloc[0] = *A_loc;
5472   }
5473   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5474   if (!col) { /* attach global id of condensed columns */
5475     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5476   }
5477   *A_loc = aloc[0];
5478   PetscCall(PetscFree(aloc));
5479   if (!row) PetscCall(ISDestroy(&isrowa));
5480   if (!col) PetscCall(ISDestroy(&iscola));
5481   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5482   PetscFunctionReturn(PETSC_SUCCESS);
5483 }
5484 
5485 /*
5486  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5487  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5488  * on a global size.
5489  * */
5490 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5491 {
5492   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5493   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5494   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5495   PetscMPIInt            owner;
5496   PetscSFNode           *iremote, *oiremote;
5497   const PetscInt        *lrowindices;
5498   PetscSF                sf, osf;
5499   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5500   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5501   MPI_Comm               comm;
5502   ISLocalToGlobalMapping mapping;
5503   const PetscScalar     *pd_a, *po_a;
5504 
5505   PetscFunctionBegin;
5506   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5507   /* plocalsize is the number of roots
5508    * nrows is the number of leaves
5509    * */
5510   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5511   PetscCall(ISGetLocalSize(rows, &nrows));
5512   PetscCall(PetscCalloc1(nrows, &iremote));
5513   PetscCall(ISGetIndices(rows, &lrowindices));
5514   for (i = 0; i < nrows; i++) {
5515     /* Find a remote index and an owner for a row
5516      * The row could be local or remote
5517      * */
5518     owner = 0;
5519     lidx  = 0;
5520     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5521     iremote[i].index = lidx;
5522     iremote[i].rank  = owner;
5523   }
5524   /* Create SF to communicate how many nonzero columns for each row */
5525   PetscCall(PetscSFCreate(comm, &sf));
5526   /* SF will figure out the number of nonzero columns for each row, and their
5527    * offsets
5528    * */
5529   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5530   PetscCall(PetscSFSetFromOptions(sf));
5531   PetscCall(PetscSFSetUp(sf));
5532 
5533   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5534   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5535   PetscCall(PetscCalloc1(nrows, &pnnz));
5536   roffsets[0] = 0;
5537   roffsets[1] = 0;
5538   for (i = 0; i < plocalsize; i++) {
5539     /* diagonal */
5540     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5541     /* off-diagonal */
5542     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5543     /* compute offsets so that we relative location for each row */
5544     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5545     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5546   }
5547   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5548   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5549   /* 'r' means root, and 'l' means leaf */
5550   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5551   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5552   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5553   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5554   PetscCall(PetscSFDestroy(&sf));
5555   PetscCall(PetscFree(roffsets));
5556   PetscCall(PetscFree(nrcols));
5557   dntotalcols = 0;
5558   ontotalcols = 0;
5559   ncol        = 0;
5560   for (i = 0; i < nrows; i++) {
5561     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5562     ncol    = PetscMax(pnnz[i], ncol);
5563     /* diagonal */
5564     dntotalcols += nlcols[i * 2 + 0];
5565     /* off-diagonal */
5566     ontotalcols += nlcols[i * 2 + 1];
5567   }
5568   /* We do not need to figure the right number of columns
5569    * since all the calculations will be done by going through the raw data
5570    * */
5571   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5572   PetscCall(MatSetUp(*P_oth));
5573   PetscCall(PetscFree(pnnz));
5574   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5575   /* diagonal */
5576   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5577   /* off-diagonal */
5578   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5579   /* diagonal */
5580   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5581   /* off-diagonal */
5582   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5583   dntotalcols = 0;
5584   ontotalcols = 0;
5585   ntotalcols  = 0;
5586   for (i = 0; i < nrows; i++) {
5587     owner = 0;
5588     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5589     /* Set iremote for diag matrix */
5590     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5591       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5592       iremote[dntotalcols].rank  = owner;
5593       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5594       ilocal[dntotalcols++] = ntotalcols++;
5595     }
5596     /* off-diagonal */
5597     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5598       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5599       oiremote[ontotalcols].rank  = owner;
5600       oilocal[ontotalcols++]      = ntotalcols++;
5601     }
5602   }
5603   PetscCall(ISRestoreIndices(rows, &lrowindices));
5604   PetscCall(PetscFree(loffsets));
5605   PetscCall(PetscFree(nlcols));
5606   PetscCall(PetscSFCreate(comm, &sf));
5607   /* P serves as roots and P_oth is leaves
5608    * Diag matrix
5609    * */
5610   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5611   PetscCall(PetscSFSetFromOptions(sf));
5612   PetscCall(PetscSFSetUp(sf));
5613 
5614   PetscCall(PetscSFCreate(comm, &osf));
5615   /* off-diagonal */
5616   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5617   PetscCall(PetscSFSetFromOptions(osf));
5618   PetscCall(PetscSFSetUp(osf));
5619   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5620   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5621   /* operate on the matrix internal data to save memory */
5622   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5623   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5624   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5625   /* Convert to global indices for diag matrix */
5626   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5627   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5628   /* We want P_oth store global indices */
5629   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5630   /* Use memory scalable approach */
5631   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5632   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5633   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5634   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5635   /* Convert back to local indices */
5636   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5637   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5638   nout = 0;
5639   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5640   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5641   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5642   /* Exchange values */
5643   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5644   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5645   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5646   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5647   /* Stop PETSc from shrinking memory */
5648   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5649   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5650   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5651   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5652   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5653   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5654   PetscCall(PetscSFDestroy(&sf));
5655   PetscCall(PetscSFDestroy(&osf));
5656   PetscFunctionReturn(PETSC_SUCCESS);
5657 }
5658 
5659 /*
5660  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5661  * This supports MPIAIJ and MAIJ
5662  * */
5663 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5664 {
5665   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5666   Mat_SeqAIJ *p_oth;
5667   IS          rows, map;
5668   PetscHMapI  hamp;
5669   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5670   MPI_Comm    comm;
5671   PetscSF     sf, osf;
5672   PetscBool   has;
5673 
5674   PetscFunctionBegin;
5675   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5676   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5677   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5678    *  and then create a submatrix (that often is an overlapping matrix)
5679    * */
5680   if (reuse == MAT_INITIAL_MATRIX) {
5681     /* Use a hash table to figure out unique keys */
5682     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5683     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5684     count = 0;
5685     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5686     for (i = 0; i < a->B->cmap->n; i++) {
5687       key = a->garray[i] / dof;
5688       PetscCall(PetscHMapIHas(hamp, key, &has));
5689       if (!has) {
5690         mapping[i] = count;
5691         PetscCall(PetscHMapISet(hamp, key, count++));
5692       } else {
5693         /* Current 'i' has the same value the previous step */
5694         mapping[i] = count - 1;
5695       }
5696     }
5697     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5698     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5699     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5700     PetscCall(PetscCalloc1(htsize, &rowindices));
5701     off = 0;
5702     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5703     PetscCall(PetscHMapIDestroy(&hamp));
5704     PetscCall(PetscSortInt(htsize, rowindices));
5705     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5706     /* In case, the matrix was already created but users want to recreate the matrix */
5707     PetscCall(MatDestroy(P_oth));
5708     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5709     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5710     PetscCall(ISDestroy(&map));
5711     PetscCall(ISDestroy(&rows));
5712   } else if (reuse == MAT_REUSE_MATRIX) {
5713     /* If matrix was already created, we simply update values using SF objects
5714      * that as attached to the matrix earlier.
5715      */
5716     const PetscScalar *pd_a, *po_a;
5717 
5718     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5719     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5720     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5721     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5722     /* Update values in place */
5723     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5724     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5725     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5726     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5727     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5728     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5729     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5730     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5731   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5732   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5733   PetscFunctionReturn(PETSC_SUCCESS);
5734 }
5735 
5736 /*@C
5737   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5738 
5739   Collective
5740 
5741   Input Parameters:
5742 + A     - the first matrix in `MATMPIAIJ` format
5743 . B     - the second matrix in `MATMPIAIJ` format
5744 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5745 
5746   Output Parameters:
5747 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5748 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5749 - B_seq - the sequential matrix generated
5750 
5751   Level: developer
5752 
5753 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5754 @*/
5755 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5756 {
5757   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5758   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5759   IS          isrowb, iscolb;
5760   Mat        *bseq = NULL;
5761 
5762   PetscFunctionBegin;
5763   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5764              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5765   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5766 
5767   if (scall == MAT_INITIAL_MATRIX) {
5768     start = A->cmap->rstart;
5769     cmap  = a->garray;
5770     nzA   = a->A->cmap->n;
5771     nzB   = a->B->cmap->n;
5772     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5773     ncols = 0;
5774     for (i = 0; i < nzB; i++) { /* row < local row index */
5775       if (cmap[i] < start) idx[ncols++] = cmap[i];
5776       else break;
5777     }
5778     imark = i;
5779     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5780     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5781     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5782     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5783   } else {
5784     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5785     isrowb = *rowb;
5786     iscolb = *colb;
5787     PetscCall(PetscMalloc1(1, &bseq));
5788     bseq[0] = *B_seq;
5789   }
5790   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5791   *B_seq = bseq[0];
5792   PetscCall(PetscFree(bseq));
5793   if (!rowb) {
5794     PetscCall(ISDestroy(&isrowb));
5795   } else {
5796     *rowb = isrowb;
5797   }
5798   if (!colb) {
5799     PetscCall(ISDestroy(&iscolb));
5800   } else {
5801     *colb = iscolb;
5802   }
5803   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5804   PetscFunctionReturn(PETSC_SUCCESS);
5805 }
5806 
5807 /*
5808     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5809     of the OFF-DIAGONAL portion of local A
5810 
5811     Collective
5812 
5813    Input Parameters:
5814 +    A,B - the matrices in `MATMPIAIJ` format
5815 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5816 
5817    Output Parameter:
5818 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5819 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5820 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5821 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5822 
5823     Developer Note:
5824     This directly accesses information inside the VecScatter associated with the matrix-vector product
5825      for this matrix. This is not desirable..
5826 
5827     Level: developer
5828 
5829 */
5830 
5831 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5832 {
5833   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5834   VecScatter         ctx;
5835   MPI_Comm           comm;
5836   const PetscMPIInt *rprocs, *sprocs;
5837   PetscMPIInt        nrecvs, nsends;
5838   const PetscInt    *srow, *rstarts, *sstarts;
5839   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5840   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5841   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5842   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5843   PetscMPIInt        size, tag, rank, nreqs;
5844 
5845   PetscFunctionBegin;
5846   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5847   PetscCallMPI(MPI_Comm_size(comm, &size));
5848 
5849   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5850              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5851   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5852   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5853 
5854   if (size == 1) {
5855     startsj_s = NULL;
5856     bufa_ptr  = NULL;
5857     *B_oth    = NULL;
5858     PetscFunctionReturn(PETSC_SUCCESS);
5859   }
5860 
5861   ctx = a->Mvctx;
5862   tag = ((PetscObject)ctx)->tag;
5863 
5864   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5865   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5866   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5867   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5868   PetscCall(PetscMalloc1(nreqs, &reqs));
5869   rwaits = reqs;
5870   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5871 
5872   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5873   if (scall == MAT_INITIAL_MATRIX) {
5874     /* i-array */
5875     /*  post receives */
5876     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5877     for (i = 0; i < nrecvs; i++) {
5878       rowlen = rvalues + rstarts[i] * rbs;
5879       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5880       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5881     }
5882 
5883     /* pack the outgoing message */
5884     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5885 
5886     sstartsj[0] = 0;
5887     rstartsj[0] = 0;
5888     len         = 0; /* total length of j or a array to be sent */
5889     if (nsends) {
5890       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5891       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5892     }
5893     for (i = 0; i < nsends; i++) {
5894       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5895       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5896       for (j = 0; j < nrows; j++) {
5897         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5898         for (l = 0; l < sbs; l++) {
5899           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5900 
5901           rowlen[j * sbs + l] = ncols;
5902 
5903           len += ncols;
5904           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5905         }
5906         k++;
5907       }
5908       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5909 
5910       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5911     }
5912     /* recvs and sends of i-array are completed */
5913     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5914     PetscCall(PetscFree(svalues));
5915 
5916     /* allocate buffers for sending j and a arrays */
5917     PetscCall(PetscMalloc1(len + 1, &bufj));
5918     PetscCall(PetscMalloc1(len + 1, &bufa));
5919 
5920     /* create i-array of B_oth */
5921     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5922 
5923     b_othi[0] = 0;
5924     len       = 0; /* total length of j or a array to be received */
5925     k         = 0;
5926     for (i = 0; i < nrecvs; i++) {
5927       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5928       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5929       for (j = 0; j < nrows; j++) {
5930         b_othi[k + 1] = b_othi[k] + rowlen[j];
5931         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5932         k++;
5933       }
5934       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5935     }
5936     PetscCall(PetscFree(rvalues));
5937 
5938     /* allocate space for j and a arrays of B_oth */
5939     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5940     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5941 
5942     /* j-array */
5943     /*  post receives of j-array */
5944     for (i = 0; i < nrecvs; i++) {
5945       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5946       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5947     }
5948 
5949     /* pack the outgoing message j-array */
5950     if (nsends) k = sstarts[0];
5951     for (i = 0; i < nsends; i++) {
5952       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5953       bufJ  = bufj + sstartsj[i];
5954       for (j = 0; j < nrows; j++) {
5955         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5956         for (ll = 0; ll < sbs; ll++) {
5957           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5958           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5959           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5960         }
5961       }
5962       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5963     }
5964 
5965     /* recvs and sends of j-array are completed */
5966     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5967   } else if (scall == MAT_REUSE_MATRIX) {
5968     sstartsj = *startsj_s;
5969     rstartsj = *startsj_r;
5970     bufa     = *bufa_ptr;
5971     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5972   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5973 
5974   /* a-array */
5975   /*  post receives of a-array */
5976   for (i = 0; i < nrecvs; i++) {
5977     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5978     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5979   }
5980 
5981   /* pack the outgoing message a-array */
5982   if (nsends) k = sstarts[0];
5983   for (i = 0; i < nsends; i++) {
5984     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5985     bufA  = bufa + sstartsj[i];
5986     for (j = 0; j < nrows; j++) {
5987       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5988       for (ll = 0; ll < sbs; ll++) {
5989         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5990         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5991         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5992       }
5993     }
5994     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5995   }
5996   /* recvs and sends of a-array are completed */
5997   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5998   PetscCall(PetscFree(reqs));
5999 
6000   if (scall == MAT_INITIAL_MATRIX) {
6001     Mat_SeqAIJ *b_oth;
6002 
6003     /* put together the new matrix */
6004     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6005 
6006     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6007     /* Since these are PETSc arrays, change flags to free them as necessary. */
6008     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6009     b_oth->free_a  = PETSC_TRUE;
6010     b_oth->free_ij = PETSC_TRUE;
6011     b_oth->nonew   = 0;
6012 
6013     PetscCall(PetscFree(bufj));
6014     if (!startsj_s || !bufa_ptr) {
6015       PetscCall(PetscFree2(sstartsj, rstartsj));
6016       PetscCall(PetscFree(bufa_ptr));
6017     } else {
6018       *startsj_s = sstartsj;
6019       *startsj_r = rstartsj;
6020       *bufa_ptr  = bufa;
6021     }
6022   } else if (scall == MAT_REUSE_MATRIX) {
6023     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6024   }
6025 
6026   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6027   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6028   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6029   PetscFunctionReturn(PETSC_SUCCESS);
6030 }
6031 
6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6035 #if defined(PETSC_HAVE_MKL_SPARSE)
6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6037 #endif
6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6040 #if defined(PETSC_HAVE_ELEMENTAL)
6041 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6042 #endif
6043 #if defined(PETSC_HAVE_SCALAPACK)
6044 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6045 #endif
6046 #if defined(PETSC_HAVE_HYPRE)
6047 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6048 #endif
6049 #if defined(PETSC_HAVE_CUDA)
6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6051 #endif
6052 #if defined(PETSC_HAVE_HIP)
6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6054 #endif
6055 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6057 #endif
6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6059 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6060 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6061 
6062 /*
6063     Computes (B'*A')' since computing B*A directly is untenable
6064 
6065                n                       p                          p
6066         [             ]       [             ]         [                 ]
6067       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6068         [             ]       [             ]         [                 ]
6069 
6070 */
6071 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6072 {
6073   Mat At, Bt, Ct;
6074 
6075   PetscFunctionBegin;
6076   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6077   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6078   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6079   PetscCall(MatDestroy(&At));
6080   PetscCall(MatDestroy(&Bt));
6081   PetscCall(MatTransposeSetPrecursor(Ct, C));
6082   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6083   PetscCall(MatDestroy(&Ct));
6084   PetscFunctionReturn(PETSC_SUCCESS);
6085 }
6086 
6087 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6088 {
6089   PetscBool cisdense;
6090 
6091   PetscFunctionBegin;
6092   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6093   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6094   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6095   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6096   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6097   PetscCall(MatSetUp(C));
6098 
6099   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6100   PetscFunctionReturn(PETSC_SUCCESS);
6101 }
6102 
6103 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6104 {
6105   Mat_Product *product = C->product;
6106   Mat          A = product->A, B = product->B;
6107 
6108   PetscFunctionBegin;
6109   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6110              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6111   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6112   C->ops->productsymbolic = MatProductSymbolic_AB;
6113   PetscFunctionReturn(PETSC_SUCCESS);
6114 }
6115 
6116 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6117 {
6118   Mat_Product *product = C->product;
6119 
6120   PetscFunctionBegin;
6121   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6122   PetscFunctionReturn(PETSC_SUCCESS);
6123 }
6124 
6125 /*
6126    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6127 
6128   Input Parameters:
6129 
6130     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6131     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6132 
6133     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6134 
6135     For Set1, j1[] contains column indices of the nonzeros.
6136     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6137     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6138     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6139 
6140     Similar for Set2.
6141 
6142     This routine merges the two sets of nonzeros row by row and removes repeats.
6143 
6144   Output Parameters: (memory is allocated by the caller)
6145 
6146     i[],j[]: the CSR of the merged matrix, which has m rows.
6147     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6148     imap2[]: similar to imap1[], but for Set2.
6149     Note we order nonzeros row-by-row and from left to right.
6150 */
6151 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6152 {
6153   PetscInt   r, m; /* Row index of mat */
6154   PetscCount t, t1, t2, b1, e1, b2, e2;
6155 
6156   PetscFunctionBegin;
6157   PetscCall(MatGetLocalSize(mat, &m, NULL));
6158   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6159   i[0]        = 0;
6160   for (r = 0; r < m; r++) { /* Do row by row merging */
6161     b1 = rowBegin1[r];
6162     e1 = rowEnd1[r];
6163     b2 = rowBegin2[r];
6164     e2 = rowEnd2[r];
6165     while (b1 < e1 && b2 < e2) {
6166       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6167         j[t]      = j1[b1];
6168         imap1[t1] = t;
6169         imap2[t2] = t;
6170         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6171         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6172         t1++;
6173         t2++;
6174         t++;
6175       } else if (j1[b1] < j2[b2]) {
6176         j[t]      = j1[b1];
6177         imap1[t1] = t;
6178         b1 += jmap1[t1 + 1] - jmap1[t1];
6179         t1++;
6180         t++;
6181       } else {
6182         j[t]      = j2[b2];
6183         imap2[t2] = t;
6184         b2 += jmap2[t2 + 1] - jmap2[t2];
6185         t2++;
6186         t++;
6187       }
6188     }
6189     /* Merge the remaining in either j1[] or j2[] */
6190     while (b1 < e1) {
6191       j[t]      = j1[b1];
6192       imap1[t1] = t;
6193       b1 += jmap1[t1 + 1] - jmap1[t1];
6194       t1++;
6195       t++;
6196     }
6197     while (b2 < e2) {
6198       j[t]      = j2[b2];
6199       imap2[t2] = t;
6200       b2 += jmap2[t2 + 1] - jmap2[t2];
6201       t2++;
6202       t++;
6203     }
6204     PetscCall(PetscIntCast(t, i + r + 1));
6205   }
6206   PetscFunctionReturn(PETSC_SUCCESS);
6207 }
6208 
6209 /*
6210   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6211 
6212   Input Parameters:
6213     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6214     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6215       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6216 
6217       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6218       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6219 
6220   Output Parameters:
6221     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6222     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6223       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6224       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6225 
6226     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6227       Atot: number of entries belonging to the diagonal block.
6228       Annz: number of unique nonzeros belonging to the diagonal block.
6229       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6230         repeats (i.e., same 'i,j' pair).
6231       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6232         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6233 
6234       Atot: number of entries belonging to the diagonal block
6235       Annz: number of unique nonzeros belonging to the diagonal block.
6236 
6237     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6238 
6239     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6240 */
6241 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6242 {
6243   PetscInt    cstart, cend, rstart, rend, row, col;
6244   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6245   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6246   PetscCount  k, m, p, q, r, s, mid;
6247   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6248 
6249   PetscFunctionBegin;
6250   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6251   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6252   m = rend - rstart;
6253 
6254   /* Skip negative rows */
6255   for (k = 0; k < n; k++)
6256     if (i[k] >= 0) break;
6257 
6258   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6259      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6260   */
6261   while (k < n) {
6262     row = i[k];
6263     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6264     for (s = k; s < n; s++)
6265       if (i[s] != row) break;
6266 
6267     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6268     for (p = k; p < s; p++) {
6269       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6270       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6271     }
6272     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6273     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6274     rowBegin[row - rstart] = k;
6275     rowMid[row - rstart]   = mid;
6276     rowEnd[row - rstart]   = s;
6277 
6278     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6279     Atot += mid - k;
6280     Btot += s - mid;
6281 
6282     /* Count unique nonzeros of this diag row */
6283     for (p = k; p < mid;) {
6284       col = j[p];
6285       do {
6286         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6287         p++;
6288       } while (p < mid && j[p] == col);
6289       Annz++;
6290     }
6291 
6292     /* Count unique nonzeros of this offdiag row */
6293     for (p = mid; p < s;) {
6294       col = j[p];
6295       do {
6296         p++;
6297       } while (p < s && j[p] == col);
6298       Bnnz++;
6299     }
6300     k = s;
6301   }
6302 
6303   /* Allocation according to Atot, Btot, Annz, Bnnz */
6304   PetscCall(PetscMalloc1(Atot, &Aperm));
6305   PetscCall(PetscMalloc1(Btot, &Bperm));
6306   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6307   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6308 
6309   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6310   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6311   for (r = 0; r < m; r++) {
6312     k   = rowBegin[r];
6313     mid = rowMid[r];
6314     s   = rowEnd[r];
6315     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6316     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6317     Atot += mid - k;
6318     Btot += s - mid;
6319 
6320     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6321     for (p = k; p < mid;) {
6322       col = j[p];
6323       q   = p;
6324       do {
6325         p++;
6326       } while (p < mid && j[p] == col);
6327       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6328       Annz++;
6329     }
6330 
6331     for (p = mid; p < s;) {
6332       col = j[p];
6333       q   = p;
6334       do {
6335         p++;
6336       } while (p < s && j[p] == col);
6337       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6338       Bnnz++;
6339     }
6340   }
6341   /* Output */
6342   *Aperm_ = Aperm;
6343   *Annz_  = Annz;
6344   *Atot_  = Atot;
6345   *Ajmap_ = Ajmap;
6346   *Bperm_ = Bperm;
6347   *Bnnz_  = Bnnz;
6348   *Btot_  = Btot;
6349   *Bjmap_ = Bjmap;
6350   PetscFunctionReturn(PETSC_SUCCESS);
6351 }
6352 
6353 /*
6354   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6355 
6356   Input Parameters:
6357     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6358     nnz:  number of unique nonzeros in the merged matrix
6359     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6360     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6361 
6362   Output Parameter: (memory is allocated by the caller)
6363     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6364 
6365   Example:
6366     nnz1 = 4
6367     nnz  = 6
6368     imap = [1,3,4,5]
6369     jmap = [0,3,5,6,7]
6370    then,
6371     jmap_new = [0,0,3,3,5,6,7]
6372 */
6373 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6374 {
6375   PetscCount k, p;
6376 
6377   PetscFunctionBegin;
6378   jmap_new[0] = 0;
6379   p           = nnz;                /* p loops over jmap_new[] backwards */
6380   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6381     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6382   }
6383   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6384   PetscFunctionReturn(PETSC_SUCCESS);
6385 }
6386 
6387 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6388 {
6389   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6390 
6391   PetscFunctionBegin;
6392   PetscCall(PetscSFDestroy(&coo->sf));
6393   PetscCall(PetscFree(coo->Aperm1));
6394   PetscCall(PetscFree(coo->Bperm1));
6395   PetscCall(PetscFree(coo->Ajmap1));
6396   PetscCall(PetscFree(coo->Bjmap1));
6397   PetscCall(PetscFree(coo->Aimap2));
6398   PetscCall(PetscFree(coo->Bimap2));
6399   PetscCall(PetscFree(coo->Aperm2));
6400   PetscCall(PetscFree(coo->Bperm2));
6401   PetscCall(PetscFree(coo->Ajmap2));
6402   PetscCall(PetscFree(coo->Bjmap2));
6403   PetscCall(PetscFree(coo->Cperm1));
6404   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6405   PetscCall(PetscFree(coo));
6406   PetscFunctionReturn(PETSC_SUCCESS);
6407 }
6408 
6409 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6410 {
6411   MPI_Comm             comm;
6412   PetscMPIInt          rank, size;
6413   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6414   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6415   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6416   PetscContainer       container;
6417   MatCOOStruct_MPIAIJ *coo;
6418 
6419   PetscFunctionBegin;
6420   PetscCall(PetscFree(mpiaij->garray));
6421   PetscCall(VecDestroy(&mpiaij->lvec));
6422 #if defined(PETSC_USE_CTABLE)
6423   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6424 #else
6425   PetscCall(PetscFree(mpiaij->colmap));
6426 #endif
6427   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6428   mat->assembled     = PETSC_FALSE;
6429   mat->was_assembled = PETSC_FALSE;
6430 
6431   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6432   PetscCallMPI(MPI_Comm_size(comm, &size));
6433   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6434   PetscCall(PetscLayoutSetUp(mat->rmap));
6435   PetscCall(PetscLayoutSetUp(mat->cmap));
6436   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6437   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6438   PetscCall(MatGetLocalSize(mat, &m, &n));
6439   PetscCall(MatGetSize(mat, &M, &N));
6440 
6441   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6442   /* entries come first, then local rows, then remote rows.                     */
6443   PetscCount n1 = coo_n, *perm1;
6444   PetscInt  *i1 = coo_i, *j1 = coo_j;
6445 
6446   PetscCall(PetscMalloc1(n1, &perm1));
6447   for (k = 0; k < n1; k++) perm1[k] = k;
6448 
6449   /* Manipulate indices so that entries with negative row or col indices will have smallest
6450      row indices, local entries will have greater but negative row indices, and remote entries
6451      will have positive row indices.
6452   */
6453   for (k = 0; k < n1; k++) {
6454     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6455     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6456     else {
6457       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6458       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6459     }
6460   }
6461 
6462   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6463   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6464 
6465   /* Advance k to the first entry we need to take care of */
6466   for (k = 0; k < n1; k++)
6467     if (i1[k] > PETSC_INT_MIN) break;
6468   PetscCount i1start = k;
6469 
6470   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6471   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6472 
6473   /*           Send remote rows to their owner                                  */
6474   /* Find which rows should be sent to which remote ranks*/
6475   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6476   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6477   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6478   const PetscInt *ranges;
6479   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6480 
6481   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6482   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6483   for (k = rem; k < n1;) {
6484     PetscMPIInt owner;
6485     PetscInt    firstRow, lastRow;
6486 
6487     /* Locate a row range */
6488     firstRow = i1[k]; /* first row of this owner */
6489     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6490     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6491 
6492     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6493     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6494 
6495     /* All entries in [k,p) belong to this remote owner */
6496     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6497       PetscMPIInt *sendto2;
6498       PetscInt    *nentries2;
6499       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6500 
6501       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6502       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6503       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6504       PetscCall(PetscFree2(sendto, nentries2));
6505       sendto   = sendto2;
6506       nentries = nentries2;
6507       maxNsend = maxNsend2;
6508     }
6509     sendto[nsend] = owner;
6510     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6511     nsend++;
6512     k = p;
6513   }
6514 
6515   /* Build 1st SF to know offsets on remote to send data */
6516   PetscSF      sf1;
6517   PetscInt     nroots = 1, nroots2 = 0;
6518   PetscInt     nleaves = nsend, nleaves2 = 0;
6519   PetscInt    *offsets;
6520   PetscSFNode *iremote;
6521 
6522   PetscCall(PetscSFCreate(comm, &sf1));
6523   PetscCall(PetscMalloc1(nsend, &iremote));
6524   PetscCall(PetscMalloc1(nsend, &offsets));
6525   for (k = 0; k < nsend; k++) {
6526     iremote[k].rank  = sendto[k];
6527     iremote[k].index = 0;
6528     nleaves2 += nentries[k];
6529     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6530   }
6531   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6532   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6533   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6534   PetscCall(PetscSFDestroy(&sf1));
6535   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6536 
6537   /* Build 2nd SF to send remote COOs to their owner */
6538   PetscSF sf2;
6539   nroots  = nroots2;
6540   nleaves = nleaves2;
6541   PetscCall(PetscSFCreate(comm, &sf2));
6542   PetscCall(PetscSFSetFromOptions(sf2));
6543   PetscCall(PetscMalloc1(nleaves, &iremote));
6544   p = 0;
6545   for (k = 0; k < nsend; k++) {
6546     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6547     for (q = 0; q < nentries[k]; q++, p++) {
6548       iremote[p].rank = sendto[k];
6549       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6550     }
6551   }
6552   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6553 
6554   /* Send the remote COOs to their owner */
6555   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6556   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6557   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6558   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6559   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6560   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6561   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6562   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6563   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6564   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6565   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6566 
6567   PetscCall(PetscFree(offsets));
6568   PetscCall(PetscFree2(sendto, nentries));
6569 
6570   /* Sort received COOs by row along with the permutation array     */
6571   for (k = 0; k < n2; k++) perm2[k] = k;
6572   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6573 
6574   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6575   PetscCount *Cperm1;
6576   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6577   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6578   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6579   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6580 
6581   /* Support for HYPRE matrices, kind of a hack.
6582      Swap min column with diagonal so that diagonal values will go first */
6583   PetscBool hypre;
6584   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6585   if (hypre) {
6586     PetscInt *minj;
6587     PetscBT   hasdiag;
6588 
6589     PetscCall(PetscBTCreate(m, &hasdiag));
6590     PetscCall(PetscMalloc1(m, &minj));
6591     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6592     for (k = i1start; k < rem; k++) {
6593       if (j1[k] < cstart || j1[k] >= cend) continue;
6594       const PetscInt rindex = i1[k] - rstart;
6595       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6596       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6597     }
6598     for (k = 0; k < n2; k++) {
6599       if (j2[k] < cstart || j2[k] >= cend) continue;
6600       const PetscInt rindex = i2[k] - rstart;
6601       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6602       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6603     }
6604     for (k = i1start; k < rem; k++) {
6605       const PetscInt rindex = i1[k] - rstart;
6606       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6607       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6608       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6609     }
6610     for (k = 0; k < n2; k++) {
6611       const PetscInt rindex = i2[k] - rstart;
6612       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6613       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6614       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6615     }
6616     PetscCall(PetscBTDestroy(&hasdiag));
6617     PetscCall(PetscFree(minj));
6618   }
6619 
6620   /* Split local COOs and received COOs into diag/offdiag portions */
6621   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6622   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6623   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6624   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6625   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6626   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6627 
6628   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6629   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6630   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6631   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6632 
6633   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6634   PetscInt *Ai, *Bi;
6635   PetscInt *Aj, *Bj;
6636 
6637   PetscCall(PetscMalloc1(m + 1, &Ai));
6638   PetscCall(PetscMalloc1(m + 1, &Bi));
6639   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6640   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6641 
6642   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6643   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6644   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6645   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6646   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6647 
6648   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6649   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6650 
6651   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6652   /* expect nonzeros in A/B most likely have local contributing entries        */
6653   PetscInt    Annz = Ai[m];
6654   PetscInt    Bnnz = Bi[m];
6655   PetscCount *Ajmap1_new, *Bjmap1_new;
6656 
6657   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6658   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6659 
6660   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6661   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6662 
6663   PetscCall(PetscFree(Aimap1));
6664   PetscCall(PetscFree(Ajmap1));
6665   PetscCall(PetscFree(Bimap1));
6666   PetscCall(PetscFree(Bjmap1));
6667   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6668   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6669   PetscCall(PetscFree(perm1));
6670   PetscCall(PetscFree3(i2, j2, perm2));
6671 
6672   Ajmap1 = Ajmap1_new;
6673   Bjmap1 = Bjmap1_new;
6674 
6675   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6676   if (Annz < Annz1 + Annz2) {
6677     PetscInt *Aj_new;
6678     PetscCall(PetscMalloc1(Annz, &Aj_new));
6679     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6680     PetscCall(PetscFree(Aj));
6681     Aj = Aj_new;
6682   }
6683 
6684   if (Bnnz < Bnnz1 + Bnnz2) {
6685     PetscInt *Bj_new;
6686     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6687     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6688     PetscCall(PetscFree(Bj));
6689     Bj = Bj_new;
6690   }
6691 
6692   /* Create new submatrices for on-process and off-process coupling                  */
6693   PetscScalar     *Aa, *Ba;
6694   MatType          rtype;
6695   Mat_SeqAIJ      *a, *b;
6696   PetscObjectState state;
6697   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6698   PetscCall(PetscCalloc1(Bnnz, &Ba));
6699   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6700   if (cstart) {
6701     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6702   }
6703 
6704   PetscCall(MatGetRootType_Private(mat, &rtype));
6705 
6706   MatSeqXAIJGetOptions_Private(mpiaij->A);
6707   PetscCall(MatDestroy(&mpiaij->A));
6708   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6709   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6710   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6711 
6712   MatSeqXAIJGetOptions_Private(mpiaij->B);
6713   PetscCall(MatDestroy(&mpiaij->B));
6714   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6715   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6716   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6717 
6718   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6719   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6720   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6721   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6722 
6723   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6724   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6725   a->free_a  = PETSC_TRUE;
6726   a->free_ij = PETSC_TRUE;
6727   b->free_a  = PETSC_TRUE;
6728   b->free_ij = PETSC_TRUE;
6729   a->maxnz   = a->nz;
6730   b->maxnz   = b->nz;
6731 
6732   /* conversion must happen AFTER multiply setup */
6733   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6734   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6735   PetscCall(VecDestroy(&mpiaij->lvec));
6736   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6737 
6738   // Put the COO struct in a container and then attach that to the matrix
6739   PetscCall(PetscMalloc1(1, &coo));
6740   coo->n       = coo_n;
6741   coo->sf      = sf2;
6742   coo->sendlen = nleaves;
6743   coo->recvlen = nroots;
6744   coo->Annz    = Annz;
6745   coo->Bnnz    = Bnnz;
6746   coo->Annz2   = Annz2;
6747   coo->Bnnz2   = Bnnz2;
6748   coo->Atot1   = Atot1;
6749   coo->Atot2   = Atot2;
6750   coo->Btot1   = Btot1;
6751   coo->Btot2   = Btot2;
6752   coo->Ajmap1  = Ajmap1;
6753   coo->Aperm1  = Aperm1;
6754   coo->Bjmap1  = Bjmap1;
6755   coo->Bperm1  = Bperm1;
6756   coo->Aimap2  = Aimap2;
6757   coo->Ajmap2  = Ajmap2;
6758   coo->Aperm2  = Aperm2;
6759   coo->Bimap2  = Bimap2;
6760   coo->Bjmap2  = Bjmap2;
6761   coo->Bperm2  = Bperm2;
6762   coo->Cperm1  = Cperm1;
6763   // Allocate in preallocation. If not used, it has zero cost on host
6764   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6765   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6766   PetscCall(PetscContainerSetPointer(container, coo));
6767   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6768   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6769   PetscCall(PetscContainerDestroy(&container));
6770   PetscFunctionReturn(PETSC_SUCCESS);
6771 }
6772 
6773 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6774 {
6775   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6776   Mat                  A = mpiaij->A, B = mpiaij->B;
6777   PetscScalar         *Aa, *Ba;
6778   PetscScalar         *sendbuf, *recvbuf;
6779   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6780   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6781   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6782   const PetscCount    *Cperm1;
6783   PetscContainer       container;
6784   MatCOOStruct_MPIAIJ *coo;
6785 
6786   PetscFunctionBegin;
6787   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6788   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6789   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6790   sendbuf = coo->sendbuf;
6791   recvbuf = coo->recvbuf;
6792   Ajmap1  = coo->Ajmap1;
6793   Ajmap2  = coo->Ajmap2;
6794   Aimap2  = coo->Aimap2;
6795   Bjmap1  = coo->Bjmap1;
6796   Bjmap2  = coo->Bjmap2;
6797   Bimap2  = coo->Bimap2;
6798   Aperm1  = coo->Aperm1;
6799   Aperm2  = coo->Aperm2;
6800   Bperm1  = coo->Bperm1;
6801   Bperm2  = coo->Bperm2;
6802   Cperm1  = coo->Cperm1;
6803 
6804   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6805   PetscCall(MatSeqAIJGetArray(B, &Ba));
6806 
6807   /* Pack entries to be sent to remote */
6808   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6809 
6810   /* Send remote entries to their owner and overlap the communication with local computation */
6811   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6812   /* Add local entries to A and B */
6813   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6814     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6815     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6816     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6817   }
6818   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6819     PetscScalar sum = 0.0;
6820     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6821     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6822   }
6823   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6824 
6825   /* Add received remote entries to A and B */
6826   for (PetscCount i = 0; i < coo->Annz2; i++) {
6827     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6828   }
6829   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6830     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6831   }
6832   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6833   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6834   PetscFunctionReturn(PETSC_SUCCESS);
6835 }
6836 
6837 /*MC
6838    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6839 
6840    Options Database Keys:
6841 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6842 
6843    Level: beginner
6844 
6845    Notes:
6846    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6847     in this case the values associated with the rows and columns one passes in are set to zero
6848     in the matrix
6849 
6850     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6851     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6852 
6853 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6854 M*/
6855 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6856 {
6857   Mat_MPIAIJ *b;
6858   PetscMPIInt size;
6859 
6860   PetscFunctionBegin;
6861   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6862 
6863   PetscCall(PetscNew(&b));
6864   B->data       = (void *)b;
6865   B->ops[0]     = MatOps_Values;
6866   B->assembled  = PETSC_FALSE;
6867   B->insertmode = NOT_SET_VALUES;
6868   b->size       = size;
6869 
6870   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6871 
6872   /* build cache for off array entries formed */
6873   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6874 
6875   b->donotstash  = PETSC_FALSE;
6876   b->colmap      = NULL;
6877   b->garray      = NULL;
6878   b->roworiented = PETSC_TRUE;
6879 
6880   /* stuff used for matrix vector multiply */
6881   b->lvec  = NULL;
6882   b->Mvctx = NULL;
6883 
6884   /* stuff for MatGetRow() */
6885   b->rowindices   = NULL;
6886   b->rowvalues    = NULL;
6887   b->getrowactive = PETSC_FALSE;
6888 
6889   /* flexible pointer used in CUSPARSE classes */
6890   b->spptr = NULL;
6891 
6892   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6893   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6894   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6897   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6900   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6902 #if defined(PETSC_HAVE_CUDA)
6903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6904 #endif
6905 #if defined(PETSC_HAVE_HIP)
6906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6907 #endif
6908 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6910 #endif
6911 #if defined(PETSC_HAVE_MKL_SPARSE)
6912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6913 #endif
6914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6918 #if defined(PETSC_HAVE_ELEMENTAL)
6919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6920 #endif
6921 #if defined(PETSC_HAVE_SCALAPACK)
6922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6923 #endif
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6926 #if defined(PETSC_HAVE_HYPRE)
6927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6929 #endif
6930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6934   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6935   PetscFunctionReturn(PETSC_SUCCESS);
6936 }
6937 
6938 /*@
6939   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6940   and "off-diagonal" part of the matrix in CSR format.
6941 
6942   Collective
6943 
6944   Input Parameters:
6945 + comm - MPI communicator
6946 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6947 . n    - This value should be the same as the local size used in creating the
6948          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6949          calculated if `N` is given) For square matrices `n` is almost always `m`.
6950 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6951 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6952 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6953 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6954 . a    - matrix values
6955 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6956 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6957 - oa   - matrix values
6958 
6959   Output Parameter:
6960 . mat - the matrix
6961 
6962   Level: advanced
6963 
6964   Notes:
6965   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6966   must free the arrays once the matrix has been destroyed and not before.
6967 
6968   The `i` and `j` indices are 0 based
6969 
6970   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6971 
6972   This sets local rows and cannot be used to set off-processor values.
6973 
6974   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6975   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6976   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6977   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6978   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6979   communication if it is known that only local entries will be set.
6980 
6981 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6982           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6983 @*/
6984 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6985 {
6986   Mat_MPIAIJ *maij;
6987 
6988   PetscFunctionBegin;
6989   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6990   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6991   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6992   PetscCall(MatCreate(comm, mat));
6993   PetscCall(MatSetSizes(*mat, m, n, M, N));
6994   PetscCall(MatSetType(*mat, MATMPIAIJ));
6995   maij = (Mat_MPIAIJ *)(*mat)->data;
6996 
6997   (*mat)->preallocated = PETSC_TRUE;
6998 
6999   PetscCall(PetscLayoutSetUp((*mat)->rmap));
7000   PetscCall(PetscLayoutSetUp((*mat)->cmap));
7001 
7002   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
7003   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7004 
7005   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7006   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7007   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7008   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7009   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7010   PetscFunctionReturn(PETSC_SUCCESS);
7011 }
7012 
7013 typedef struct {
7014   Mat       *mp;    /* intermediate products */
7015   PetscBool *mptmp; /* is the intermediate product temporary ? */
7016   PetscInt   cp;    /* number of intermediate products */
7017 
7018   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7019   PetscInt    *startsj_s, *startsj_r;
7020   PetscScalar *bufa;
7021   Mat          P_oth;
7022 
7023   /* may take advantage of merging product->B */
7024   Mat Bloc; /* B-local by merging diag and off-diag */
7025 
7026   /* cusparse does not have support to split between symbolic and numeric phases.
7027      When api_user is true, we don't need to update the numerical values
7028      of the temporary storage */
7029   PetscBool reusesym;
7030 
7031   /* support for COO values insertion */
7032   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7033   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7034   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7035   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7036   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7037   PetscMemType mtype;
7038 
7039   /* customization */
7040   PetscBool abmerge;
7041   PetscBool P_oth_bind;
7042 } MatMatMPIAIJBACKEND;
7043 
7044 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7045 {
7046   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7047   PetscInt             i;
7048 
7049   PetscFunctionBegin;
7050   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7051   PetscCall(PetscFree(mmdata->bufa));
7052   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7053   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7054   PetscCall(MatDestroy(&mmdata->P_oth));
7055   PetscCall(MatDestroy(&mmdata->Bloc));
7056   PetscCall(PetscSFDestroy(&mmdata->sf));
7057   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7058   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7059   PetscCall(PetscFree(mmdata->own[0]));
7060   PetscCall(PetscFree(mmdata->own));
7061   PetscCall(PetscFree(mmdata->off[0]));
7062   PetscCall(PetscFree(mmdata->off));
7063   PetscCall(PetscFree(mmdata));
7064   PetscFunctionReturn(PETSC_SUCCESS);
7065 }
7066 
7067 /* Copy selected n entries with indices in idx[] of A to v[].
7068    If idx is NULL, copy the whole data array of A to v[]
7069  */
7070 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7071 {
7072   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7073 
7074   PetscFunctionBegin;
7075   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7076   if (f) {
7077     PetscCall((*f)(A, n, idx, v));
7078   } else {
7079     const PetscScalar *vv;
7080 
7081     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7082     if (n && idx) {
7083       PetscScalar    *w  = v;
7084       const PetscInt *oi = idx;
7085       PetscInt        j;
7086 
7087       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7088     } else {
7089       PetscCall(PetscArraycpy(v, vv, n));
7090     }
7091     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7092   }
7093   PetscFunctionReturn(PETSC_SUCCESS);
7094 }
7095 
7096 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7097 {
7098   MatMatMPIAIJBACKEND *mmdata;
7099   PetscInt             i, n_d, n_o;
7100 
7101   PetscFunctionBegin;
7102   MatCheckProduct(C, 1);
7103   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7104   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7105   if (!mmdata->reusesym) { /* update temporary matrices */
7106     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7107     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7108   }
7109   mmdata->reusesym = PETSC_FALSE;
7110 
7111   for (i = 0; i < mmdata->cp; i++) {
7112     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7113     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7114   }
7115   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7116     PetscInt noff;
7117 
7118     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7119     if (mmdata->mptmp[i]) continue;
7120     if (noff) {
7121       PetscInt nown;
7122 
7123       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7124       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7125       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7126       n_o += noff;
7127       n_d += nown;
7128     } else {
7129       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7130 
7131       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7132       n_d += mm->nz;
7133     }
7134   }
7135   if (mmdata->hasoffproc) { /* offprocess insertion */
7136     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7137     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7138   }
7139   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7140   PetscFunctionReturn(PETSC_SUCCESS);
7141 }
7142 
7143 /* Support for Pt * A, A * P, or Pt * A * P */
7144 #define MAX_NUMBER_INTERMEDIATE 4
7145 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7146 {
7147   Mat_Product           *product = C->product;
7148   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7149   Mat_MPIAIJ            *a, *p;
7150   MatMatMPIAIJBACKEND   *mmdata;
7151   ISLocalToGlobalMapping P_oth_l2g = NULL;
7152   IS                     glob      = NULL;
7153   const char            *prefix;
7154   char                   pprefix[256];
7155   const PetscInt        *globidx, *P_oth_idx;
7156   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7157   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7158   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7159                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7160                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7161   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7162 
7163   MatProductType ptype;
7164   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7165   PetscMPIInt    size;
7166 
7167   PetscFunctionBegin;
7168   MatCheckProduct(C, 1);
7169   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7170   ptype = product->type;
7171   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7172     ptype                                          = MATPRODUCT_AB;
7173     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7174   }
7175   switch (ptype) {
7176   case MATPRODUCT_AB:
7177     A          = product->A;
7178     P          = product->B;
7179     m          = A->rmap->n;
7180     n          = P->cmap->n;
7181     M          = A->rmap->N;
7182     N          = P->cmap->N;
7183     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7184     break;
7185   case MATPRODUCT_AtB:
7186     P          = product->A;
7187     A          = product->B;
7188     m          = P->cmap->n;
7189     n          = A->cmap->n;
7190     M          = P->cmap->N;
7191     N          = A->cmap->N;
7192     hasoffproc = PETSC_TRUE;
7193     break;
7194   case MATPRODUCT_PtAP:
7195     A          = product->A;
7196     P          = product->B;
7197     m          = P->cmap->n;
7198     n          = P->cmap->n;
7199     M          = P->cmap->N;
7200     N          = P->cmap->N;
7201     hasoffproc = PETSC_TRUE;
7202     break;
7203   default:
7204     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7205   }
7206   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7207   if (size == 1) hasoffproc = PETSC_FALSE;
7208 
7209   /* defaults */
7210   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7211     mp[i]    = NULL;
7212     mptmp[i] = PETSC_FALSE;
7213     rmapt[i] = -1;
7214     cmapt[i] = -1;
7215     rmapa[i] = NULL;
7216     cmapa[i] = NULL;
7217   }
7218 
7219   /* customization */
7220   PetscCall(PetscNew(&mmdata));
7221   mmdata->reusesym = product->api_user;
7222   if (ptype == MATPRODUCT_AB) {
7223     if (product->api_user) {
7224       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7225       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7226       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7227       PetscOptionsEnd();
7228     } else {
7229       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7230       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7231       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7232       PetscOptionsEnd();
7233     }
7234   } else if (ptype == MATPRODUCT_PtAP) {
7235     if (product->api_user) {
7236       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7237       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7238       PetscOptionsEnd();
7239     } else {
7240       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7241       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7242       PetscOptionsEnd();
7243     }
7244   }
7245   a = (Mat_MPIAIJ *)A->data;
7246   p = (Mat_MPIAIJ *)P->data;
7247   PetscCall(MatSetSizes(C, m, n, M, N));
7248   PetscCall(PetscLayoutSetUp(C->rmap));
7249   PetscCall(PetscLayoutSetUp(C->cmap));
7250   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7251   PetscCall(MatGetOptionsPrefix(C, &prefix));
7252 
7253   cp = 0;
7254   switch (ptype) {
7255   case MATPRODUCT_AB: /* A * P */
7256     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7257 
7258     /* A_diag * P_local (merged or not) */
7259     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7260       /* P is product->B */
7261       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7262       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7263       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7264       PetscCall(MatProductSetFill(mp[cp], product->fill));
7265       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7266       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7267       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7268       mp[cp]->product->api_user = product->api_user;
7269       PetscCall(MatProductSetFromOptions(mp[cp]));
7270       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7271       PetscCall(ISGetIndices(glob, &globidx));
7272       rmapt[cp] = 1;
7273       cmapt[cp] = 2;
7274       cmapa[cp] = globidx;
7275       mptmp[cp] = PETSC_FALSE;
7276       cp++;
7277     } else { /* A_diag * P_diag and A_diag * P_off */
7278       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7279       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7280       PetscCall(MatProductSetFill(mp[cp], product->fill));
7281       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7282       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7283       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7284       mp[cp]->product->api_user = product->api_user;
7285       PetscCall(MatProductSetFromOptions(mp[cp]));
7286       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7287       rmapt[cp] = 1;
7288       cmapt[cp] = 1;
7289       mptmp[cp] = PETSC_FALSE;
7290       cp++;
7291       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7292       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7293       PetscCall(MatProductSetFill(mp[cp], product->fill));
7294       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7295       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7296       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7297       mp[cp]->product->api_user = product->api_user;
7298       PetscCall(MatProductSetFromOptions(mp[cp]));
7299       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7300       rmapt[cp] = 1;
7301       cmapt[cp] = 2;
7302       cmapa[cp] = p->garray;
7303       mptmp[cp] = PETSC_FALSE;
7304       cp++;
7305     }
7306 
7307     /* A_off * P_other */
7308     if (mmdata->P_oth) {
7309       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7310       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7311       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7312       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7313       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7314       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7315       PetscCall(MatProductSetFill(mp[cp], product->fill));
7316       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7317       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7318       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7319       mp[cp]->product->api_user = product->api_user;
7320       PetscCall(MatProductSetFromOptions(mp[cp]));
7321       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7322       rmapt[cp] = 1;
7323       cmapt[cp] = 2;
7324       cmapa[cp] = P_oth_idx;
7325       mptmp[cp] = PETSC_FALSE;
7326       cp++;
7327     }
7328     break;
7329 
7330   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7331     /* A is product->B */
7332     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7333     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7334       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7335       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7336       PetscCall(MatProductSetFill(mp[cp], product->fill));
7337       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7338       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7339       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7340       mp[cp]->product->api_user = product->api_user;
7341       PetscCall(MatProductSetFromOptions(mp[cp]));
7342       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7343       PetscCall(ISGetIndices(glob, &globidx));
7344       rmapt[cp] = 2;
7345       rmapa[cp] = globidx;
7346       cmapt[cp] = 2;
7347       cmapa[cp] = globidx;
7348       mptmp[cp] = PETSC_FALSE;
7349       cp++;
7350     } else {
7351       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7352       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7353       PetscCall(MatProductSetFill(mp[cp], product->fill));
7354       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7355       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7356       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7357       mp[cp]->product->api_user = product->api_user;
7358       PetscCall(MatProductSetFromOptions(mp[cp]));
7359       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7360       PetscCall(ISGetIndices(glob, &globidx));
7361       rmapt[cp] = 1;
7362       cmapt[cp] = 2;
7363       cmapa[cp] = globidx;
7364       mptmp[cp] = PETSC_FALSE;
7365       cp++;
7366       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7367       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7368       PetscCall(MatProductSetFill(mp[cp], product->fill));
7369       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7370       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7371       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7372       mp[cp]->product->api_user = product->api_user;
7373       PetscCall(MatProductSetFromOptions(mp[cp]));
7374       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7375       rmapt[cp] = 2;
7376       rmapa[cp] = p->garray;
7377       cmapt[cp] = 2;
7378       cmapa[cp] = globidx;
7379       mptmp[cp] = PETSC_FALSE;
7380       cp++;
7381     }
7382     break;
7383   case MATPRODUCT_PtAP:
7384     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7385     /* P is product->B */
7386     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7387     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7388     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7389     PetscCall(MatProductSetFill(mp[cp], product->fill));
7390     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7391     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7392     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7393     mp[cp]->product->api_user = product->api_user;
7394     PetscCall(MatProductSetFromOptions(mp[cp]));
7395     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7396     PetscCall(ISGetIndices(glob, &globidx));
7397     rmapt[cp] = 2;
7398     rmapa[cp] = globidx;
7399     cmapt[cp] = 2;
7400     cmapa[cp] = globidx;
7401     mptmp[cp] = PETSC_FALSE;
7402     cp++;
7403     if (mmdata->P_oth) {
7404       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7405       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7406       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7407       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7408       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7409       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7410       PetscCall(MatProductSetFill(mp[cp], product->fill));
7411       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7412       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7413       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7414       mp[cp]->product->api_user = product->api_user;
7415       PetscCall(MatProductSetFromOptions(mp[cp]));
7416       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7417       mptmp[cp] = PETSC_TRUE;
7418       cp++;
7419       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7420       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7421       PetscCall(MatProductSetFill(mp[cp], product->fill));
7422       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7423       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7424       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7425       mp[cp]->product->api_user = product->api_user;
7426       PetscCall(MatProductSetFromOptions(mp[cp]));
7427       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7428       rmapt[cp] = 2;
7429       rmapa[cp] = globidx;
7430       cmapt[cp] = 2;
7431       cmapa[cp] = P_oth_idx;
7432       mptmp[cp] = PETSC_FALSE;
7433       cp++;
7434     }
7435     break;
7436   default:
7437     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7438   }
7439   /* sanity check */
7440   if (size > 1)
7441     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7442 
7443   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7444   for (i = 0; i < cp; i++) {
7445     mmdata->mp[i]    = mp[i];
7446     mmdata->mptmp[i] = mptmp[i];
7447   }
7448   mmdata->cp             = cp;
7449   C->product->data       = mmdata;
7450   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7451   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7452 
7453   /* memory type */
7454   mmdata->mtype = PETSC_MEMTYPE_HOST;
7455   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7456   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7457   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7458   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7459   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7460   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7461 
7462   /* prepare coo coordinates for values insertion */
7463 
7464   /* count total nonzeros of those intermediate seqaij Mats
7465     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7466     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7467     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7468   */
7469   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7470     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7471     if (mptmp[cp]) continue;
7472     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7473       const PetscInt *rmap = rmapa[cp];
7474       const PetscInt  mr   = mp[cp]->rmap->n;
7475       const PetscInt  rs   = C->rmap->rstart;
7476       const PetscInt  re   = C->rmap->rend;
7477       const PetscInt *ii   = mm->i;
7478       for (i = 0; i < mr; i++) {
7479         const PetscInt gr = rmap[i];
7480         const PetscInt nz = ii[i + 1] - ii[i];
7481         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7482         else ncoo_oown += nz;                  /* this row is local */
7483       }
7484     } else ncoo_d += mm->nz;
7485   }
7486 
7487   /*
7488     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7489 
7490     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7491 
7492     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7493 
7494     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7495     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7496     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7497 
7498     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7499     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7500   */
7501   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7502   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7503 
7504   /* gather (i,j) of nonzeros inserted by remote procs */
7505   if (hasoffproc) {
7506     PetscSF  msf;
7507     PetscInt ncoo2, *coo_i2, *coo_j2;
7508 
7509     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7510     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7511     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7512 
7513     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7514       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7515       PetscInt   *idxoff = mmdata->off[cp];
7516       PetscInt   *idxown = mmdata->own[cp];
7517       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7518         const PetscInt *rmap = rmapa[cp];
7519         const PetscInt *cmap = cmapa[cp];
7520         const PetscInt *ii   = mm->i;
7521         PetscInt       *coi  = coo_i + ncoo_o;
7522         PetscInt       *coj  = coo_j + ncoo_o;
7523         const PetscInt  mr   = mp[cp]->rmap->n;
7524         const PetscInt  rs   = C->rmap->rstart;
7525         const PetscInt  re   = C->rmap->rend;
7526         const PetscInt  cs   = C->cmap->rstart;
7527         for (i = 0; i < mr; i++) {
7528           const PetscInt *jj = mm->j + ii[i];
7529           const PetscInt  gr = rmap[i];
7530           const PetscInt  nz = ii[i + 1] - ii[i];
7531           if (gr < rs || gr >= re) { /* this is an offproc row */
7532             for (j = ii[i]; j < ii[i + 1]; j++) {
7533               *coi++    = gr;
7534               *idxoff++ = j;
7535             }
7536             if (!cmapt[cp]) { /* already global */
7537               for (j = 0; j < nz; j++) *coj++ = jj[j];
7538             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7539               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7540             } else { /* offdiag */
7541               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7542             }
7543             ncoo_o += nz;
7544           } else { /* this is a local row */
7545             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7546           }
7547         }
7548       }
7549       mmdata->off[cp + 1] = idxoff;
7550       mmdata->own[cp + 1] = idxown;
7551     }
7552 
7553     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7554     PetscInt incoo_o;
7555     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7556     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7557     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7558     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7559     ncoo = ncoo_d + ncoo_oown + ncoo2;
7560     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7561     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7562     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7563     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7564     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7565     PetscCall(PetscFree2(coo_i, coo_j));
7566     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7567     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7568     coo_i = coo_i2;
7569     coo_j = coo_j2;
7570   } else { /* no offproc values insertion */
7571     ncoo = ncoo_d;
7572     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7573 
7574     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7575     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7576     PetscCall(PetscSFSetUp(mmdata->sf));
7577   }
7578   mmdata->hasoffproc = hasoffproc;
7579 
7580   /* gather (i,j) of nonzeros inserted locally */
7581   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7582     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7583     PetscInt       *coi  = coo_i + ncoo_d;
7584     PetscInt       *coj  = coo_j + ncoo_d;
7585     const PetscInt *jj   = mm->j;
7586     const PetscInt *ii   = mm->i;
7587     const PetscInt *cmap = cmapa[cp];
7588     const PetscInt *rmap = rmapa[cp];
7589     const PetscInt  mr   = mp[cp]->rmap->n;
7590     const PetscInt  rs   = C->rmap->rstart;
7591     const PetscInt  re   = C->rmap->rend;
7592     const PetscInt  cs   = C->cmap->rstart;
7593 
7594     if (mptmp[cp]) continue;
7595     if (rmapt[cp] == 1) { /* consecutive rows */
7596       /* fill coo_i */
7597       for (i = 0; i < mr; i++) {
7598         const PetscInt gr = i + rs;
7599         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7600       }
7601       /* fill coo_j */
7602       if (!cmapt[cp]) { /* type-0, already global */
7603         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7604       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7605         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7606       } else {                                            /* type-2, local to global for sparse columns */
7607         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7608       }
7609       ncoo_d += mm->nz;
7610     } else if (rmapt[cp] == 2) { /* sparse rows */
7611       for (i = 0; i < mr; i++) {
7612         const PetscInt *jj = mm->j + ii[i];
7613         const PetscInt  gr = rmap[i];
7614         const PetscInt  nz = ii[i + 1] - ii[i];
7615         if (gr >= rs && gr < re) { /* local rows */
7616           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7617           if (!cmapt[cp]) { /* type-0, already global */
7618             for (j = 0; j < nz; j++) *coj++ = jj[j];
7619           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7620             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7621           } else { /* type-2, local to global for sparse columns */
7622             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7623           }
7624           ncoo_d += nz;
7625         }
7626       }
7627     }
7628   }
7629   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7630   PetscCall(ISDestroy(&glob));
7631   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7632   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7633   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7634   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7635 
7636   /* preallocate with COO data */
7637   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7638   PetscCall(PetscFree2(coo_i, coo_j));
7639   PetscFunctionReturn(PETSC_SUCCESS);
7640 }
7641 
7642 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7643 {
7644   Mat_Product *product = mat->product;
7645 #if defined(PETSC_HAVE_DEVICE)
7646   PetscBool match  = PETSC_FALSE;
7647   PetscBool usecpu = PETSC_FALSE;
7648 #else
7649   PetscBool match = PETSC_TRUE;
7650 #endif
7651 
7652   PetscFunctionBegin;
7653   MatCheckProduct(mat, 1);
7654 #if defined(PETSC_HAVE_DEVICE)
7655   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7656   if (match) { /* we can always fallback to the CPU if requested */
7657     switch (product->type) {
7658     case MATPRODUCT_AB:
7659       if (product->api_user) {
7660         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7661         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7662         PetscOptionsEnd();
7663       } else {
7664         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7665         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7666         PetscOptionsEnd();
7667       }
7668       break;
7669     case MATPRODUCT_AtB:
7670       if (product->api_user) {
7671         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7672         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7673         PetscOptionsEnd();
7674       } else {
7675         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7676         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7677         PetscOptionsEnd();
7678       }
7679       break;
7680     case MATPRODUCT_PtAP:
7681       if (product->api_user) {
7682         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7683         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7684         PetscOptionsEnd();
7685       } else {
7686         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7687         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7688         PetscOptionsEnd();
7689       }
7690       break;
7691     default:
7692       break;
7693     }
7694     match = (PetscBool)!usecpu;
7695   }
7696 #endif
7697   if (match) {
7698     switch (product->type) {
7699     case MATPRODUCT_AB:
7700     case MATPRODUCT_AtB:
7701     case MATPRODUCT_PtAP:
7702       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7703       break;
7704     default:
7705       break;
7706     }
7707   }
7708   /* fallback to MPIAIJ ops */
7709   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7710   PetscFunctionReturn(PETSC_SUCCESS);
7711 }
7712 
7713 /*
7714    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7715 
7716    n - the number of block indices in cc[]
7717    cc - the block indices (must be large enough to contain the indices)
7718 */
7719 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7720 {
7721   PetscInt        cnt = -1, nidx, j;
7722   const PetscInt *idx;
7723 
7724   PetscFunctionBegin;
7725   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7726   if (nidx) {
7727     cnt     = 0;
7728     cc[cnt] = idx[0] / bs;
7729     for (j = 1; j < nidx; j++) {
7730       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7731     }
7732   }
7733   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7734   *n = cnt + 1;
7735   PetscFunctionReturn(PETSC_SUCCESS);
7736 }
7737 
7738 /*
7739     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7740 
7741     ncollapsed - the number of block indices
7742     collapsed - the block indices (must be large enough to contain the indices)
7743 */
7744 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7745 {
7746   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7747 
7748   PetscFunctionBegin;
7749   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7750   for (i = start + 1; i < start + bs; i++) {
7751     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7752     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7753     cprevtmp = cprev;
7754     cprev    = merged;
7755     merged   = cprevtmp;
7756   }
7757   *ncollapsed = nprev;
7758   if (collapsed) *collapsed = cprev;
7759   PetscFunctionReturn(PETSC_SUCCESS);
7760 }
7761 
7762 /*
7763  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7764 
7765  Input Parameter:
7766  . Amat - matrix
7767  - symmetrize - make the result symmetric
7768  + scale - scale with diagonal
7769 
7770  Output Parameter:
7771  . a_Gmat - output scalar graph >= 0
7772 
7773 */
7774 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7775 {
7776   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7777   MPI_Comm  comm;
7778   Mat       Gmat;
7779   PetscBool ismpiaij, isseqaij;
7780   Mat       a, b, c;
7781   MatType   jtype;
7782 
7783   PetscFunctionBegin;
7784   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7785   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7786   PetscCall(MatGetSize(Amat, &MM, &NN));
7787   PetscCall(MatGetBlockSize(Amat, &bs));
7788   nloc = (Iend - Istart) / bs;
7789 
7790   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7791   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7792   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7793 
7794   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7795   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7796      implementation */
7797   if (bs > 1) {
7798     PetscCall(MatGetType(Amat, &jtype));
7799     PetscCall(MatCreate(comm, &Gmat));
7800     PetscCall(MatSetType(Gmat, jtype));
7801     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7802     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7803     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7804       PetscInt  *d_nnz, *o_nnz;
7805       MatScalar *aa, val, *AA;
7806       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7807 
7808       if (isseqaij) {
7809         a = Amat;
7810         b = NULL;
7811       } else {
7812         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7813         a             = d->A;
7814         b             = d->B;
7815       }
7816       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7817       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7818       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7819         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7820         const PetscInt *cols1, *cols2;
7821 
7822         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7823           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7824           nnz[brow / bs] = nc2 / bs;
7825           if (nc2 % bs) ok = 0;
7826           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7827           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7828             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7829             if (nc1 != nc2) ok = 0;
7830             else {
7831               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7832                 if (cols1[jj] != cols2[jj]) ok = 0;
7833                 if (cols1[jj] % bs != jj % bs) ok = 0;
7834               }
7835             }
7836             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7837           }
7838           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7839           if (!ok) {
7840             PetscCall(PetscFree2(d_nnz, o_nnz));
7841             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7842             goto old_bs;
7843           }
7844         }
7845       }
7846       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7847       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7848       PetscCall(PetscFree2(d_nnz, o_nnz));
7849       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7850       // diag
7851       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7852         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7853 
7854         ai = aseq->i;
7855         n  = ai[brow + 1] - ai[brow];
7856         aj = aseq->j + ai[brow];
7857         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7858           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7859           val        = 0;
7860           if (index_size == 0) {
7861             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7862               aa = aseq->a + ai[brow + ii] + k;
7863               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7864                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7865               }
7866             }
7867           } else {                                            // use (index,index) value if provided
7868             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7869               PetscInt ii = index[iii];
7870               aa          = aseq->a + ai[brow + ii] + k;
7871               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7872                 PetscInt jj = index[jjj];
7873                 val += PetscAbs(PetscRealPart(aa[jj]));
7874               }
7875             }
7876           }
7877           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7878           AA[k / bs] = val;
7879         }
7880         grow = Istart / bs + brow / bs;
7881         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7882       }
7883       // off-diag
7884       if (ismpiaij) {
7885         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7886         const PetscScalar *vals;
7887         const PetscInt    *cols, *garray = aij->garray;
7888 
7889         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7890         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7891           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7892           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7893             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7894             AA[k / bs] = 0;
7895             AJ[cidx]   = garray[cols[k]] / bs;
7896           }
7897           nc = ncols / bs;
7898           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7899           if (index_size == 0) {
7900             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7901               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7902               for (PetscInt k = 0; k < ncols; k += bs) {
7903                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7904                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7905                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7906                 }
7907               }
7908               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7909             }
7910           } else {                                            // use (index,index) value if provided
7911             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7912               PetscInt ii = index[iii];
7913               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7914               for (PetscInt k = 0; k < ncols; k += bs) {
7915                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7916                   PetscInt jj = index[jjj];
7917                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7918                 }
7919               }
7920               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7921             }
7922           }
7923           grow = Istart / bs + brow / bs;
7924           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7925         }
7926       }
7927       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7928       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7929       PetscCall(PetscFree2(AA, AJ));
7930     } else {
7931       const PetscScalar *vals;
7932       const PetscInt    *idx;
7933       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7934     old_bs:
7935       /*
7936        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7937        */
7938       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7939       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7940       if (isseqaij) {
7941         PetscInt max_d_nnz;
7942 
7943         /*
7944          Determine exact preallocation count for (sequential) scalar matrix
7945          */
7946         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7947         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7948         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7949         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7950         PetscCall(PetscFree3(w0, w1, w2));
7951       } else if (ismpiaij) {
7952         Mat             Daij, Oaij;
7953         const PetscInt *garray;
7954         PetscInt        max_d_nnz;
7955 
7956         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7957         /*
7958          Determine exact preallocation count for diagonal block portion of scalar matrix
7959          */
7960         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7961         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7962         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7963         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7964         PetscCall(PetscFree3(w0, w1, w2));
7965         /*
7966          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7967          */
7968         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7969           o_nnz[jj] = 0;
7970           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7971             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7972             o_nnz[jj] += ncols;
7973             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7974           }
7975           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7976         }
7977       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7978       /* get scalar copy (norms) of matrix */
7979       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7980       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7981       PetscCall(PetscFree2(d_nnz, o_nnz));
7982       for (Ii = Istart; Ii < Iend; Ii++) {
7983         PetscInt dest_row = Ii / bs;
7984 
7985         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7986         for (jj = 0; jj < ncols; jj++) {
7987           PetscInt    dest_col = idx[jj] / bs;
7988           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7989 
7990           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7991         }
7992         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7993       }
7994       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7995       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7996     }
7997   } else {
7998     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7999     else {
8000       Gmat = Amat;
8001       PetscCall(PetscObjectReference((PetscObject)Gmat));
8002     }
8003     if (isseqaij) {
8004       a = Gmat;
8005       b = NULL;
8006     } else {
8007       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8008       a             = d->A;
8009       b             = d->B;
8010     }
8011     if (filter >= 0 || scale) {
8012       /* take absolute value of each entry */
8013       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8014         MatInfo      info;
8015         PetscScalar *avals;
8016 
8017         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8018         PetscCall(MatSeqAIJGetArray(c, &avals));
8019         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8020         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8021       }
8022     }
8023   }
8024   if (symmetrize) {
8025     PetscBool isset, issym;
8026 
8027     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8028     if (!isset || !issym) {
8029       Mat matTrans;
8030 
8031       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8032       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8033       PetscCall(MatDestroy(&matTrans));
8034     }
8035     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8036   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8037   if (scale) {
8038     /* scale c for all diagonal values = 1 or -1 */
8039     Vec diag;
8040 
8041     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8042     PetscCall(MatGetDiagonal(Gmat, diag));
8043     PetscCall(VecReciprocal(diag));
8044     PetscCall(VecSqrtAbs(diag));
8045     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8046     PetscCall(VecDestroy(&diag));
8047   }
8048   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8049   if (filter >= 0) {
8050     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8051     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8052   }
8053   *a_Gmat = Gmat;
8054   PetscFunctionReturn(PETSC_SUCCESS);
8055 }
8056 
8057 /*
8058     Special version for direct calls from Fortran
8059 */
8060 
8061 /* Change these macros so can be used in void function */
8062 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8063 #undef PetscCall
8064 #define PetscCall(...) \
8065   do { \
8066     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8067     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8068       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8069       return; \
8070     } \
8071   } while (0)
8072 
8073 #undef SETERRQ
8074 #define SETERRQ(comm, ierr, ...) \
8075   do { \
8076     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8077     return; \
8078   } while (0)
8079 
8080 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8081   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8082 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8083   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8084 #else
8085 #endif
8086 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8087 {
8088   Mat         mat = *mmat;
8089   PetscInt    m = *mm, n = *mn;
8090   InsertMode  addv = *maddv;
8091   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8092   PetscScalar value;
8093 
8094   MatCheckPreallocated(mat, 1);
8095   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8096   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8097   {
8098     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8099     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8100     PetscBool roworiented = aij->roworiented;
8101 
8102     /* Some Variables required in the macro */
8103     Mat         A     = aij->A;
8104     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8105     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8106     MatScalar  *aa;
8107     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8108     Mat         B                 = aij->B;
8109     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8110     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8111     MatScalar  *ba;
8112     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8113      * cannot use "#if defined" inside a macro. */
8114     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8115 
8116     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8117     PetscInt   nonew = a->nonew;
8118     MatScalar *ap1, *ap2;
8119 
8120     PetscFunctionBegin;
8121     PetscCall(MatSeqAIJGetArray(A, &aa));
8122     PetscCall(MatSeqAIJGetArray(B, &ba));
8123     for (i = 0; i < m; i++) {
8124       if (im[i] < 0) continue;
8125       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8126       if (im[i] >= rstart && im[i] < rend) {
8127         row      = im[i] - rstart;
8128         lastcol1 = -1;
8129         rp1      = aj + ai[row];
8130         ap1      = aa + ai[row];
8131         rmax1    = aimax[row];
8132         nrow1    = ailen[row];
8133         low1     = 0;
8134         high1    = nrow1;
8135         lastcol2 = -1;
8136         rp2      = bj + bi[row];
8137         ap2      = ba + bi[row];
8138         rmax2    = bimax[row];
8139         nrow2    = bilen[row];
8140         low2     = 0;
8141         high2    = nrow2;
8142 
8143         for (j = 0; j < n; j++) {
8144           if (roworiented) value = v[i * n + j];
8145           else value = v[i + j * m];
8146           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8147           if (in[j] >= cstart && in[j] < cend) {
8148             col = in[j] - cstart;
8149             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8150           } else if (in[j] < 0) continue;
8151           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8152             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8153           } else {
8154             if (mat->was_assembled) {
8155               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8156 #if defined(PETSC_USE_CTABLE)
8157               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8158               col--;
8159 #else
8160               col = aij->colmap[in[j]] - 1;
8161 #endif
8162               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8163                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8164                 col = in[j];
8165                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8166                 B        = aij->B;
8167                 b        = (Mat_SeqAIJ *)B->data;
8168                 bimax    = b->imax;
8169                 bi       = b->i;
8170                 bilen    = b->ilen;
8171                 bj       = b->j;
8172                 rp2      = bj + bi[row];
8173                 ap2      = ba + bi[row];
8174                 rmax2    = bimax[row];
8175                 nrow2    = bilen[row];
8176                 low2     = 0;
8177                 high2    = nrow2;
8178                 bm       = aij->B->rmap->n;
8179                 ba       = b->a;
8180                 inserted = PETSC_FALSE;
8181               }
8182             } else col = in[j];
8183             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8184           }
8185         }
8186       } else if (!aij->donotstash) {
8187         if (roworiented) {
8188           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8189         } else {
8190           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8191         }
8192       }
8193     }
8194     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8195     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8196   }
8197   PetscFunctionReturnVoid();
8198 }
8199 
8200 /* Undefining these here since they were redefined from their original definition above! No
8201  * other PETSc functions should be defined past this point, as it is impossible to recover the
8202  * original definitions */
8203 #undef PetscCall
8204 #undef SETERRQ
8205