xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a68bbae58a07f2fb515cab24a67de1159d72e8a2)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14 #if defined(PETSC_USE_LOG)
15   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
16 #endif
17   PetscCall(MatStashDestroy_Private(&mat->stash));
18   PetscCall(VecDestroy(&aij->diag));
19   PetscCall(MatDestroy(&aij->A));
20   PetscCall(MatDestroy(&aij->B));
21 #if defined(PETSC_USE_CTABLE)
22   PetscCall(PetscHMapIDestroy(&aij->colmap));
23 #else
24   PetscCall(PetscFree(aij->colmap));
25 #endif
26   PetscCall(PetscFree(aij->garray));
27   PetscCall(VecDestroy(&aij->lvec));
28   PetscCall(VecScatterDestroy(&aij->Mvctx));
29   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
30   PetscCall(PetscFree(aij->ld));
31 
32   /* Free COO */
33   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
34 
35   PetscCall(PetscFree(mat->data));
36 
37   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
38   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
39 
40   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
50 #if defined(PETSC_HAVE_CUDA)
51   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
52 #endif
53 #if defined(PETSC_HAVE_HIP)
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
55 #endif
56 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
57   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
58 #endif
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
60 #if defined(PETSC_HAVE_ELEMENTAL)
61   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
62 #endif
63 #if defined(PETSC_HAVE_SCALAPACK)
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
65 #endif
66 #if defined(PETSC_HAVE_HYPRE)
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
69 #endif
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
76 #if defined(PETSC_HAVE_MKL_SPARSE)
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
78 #endif
79   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
80   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
82   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
84   PetscFunctionReturn(PETSC_SUCCESS);
85 }
86 
87 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
88 #define TYPE AIJ
89 #define TYPE_AIJ
90 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
91 #undef TYPE
92 #undef TYPE_AIJ
93 
94 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
95 {
96   Mat B;
97 
98   PetscFunctionBegin;
99   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
100   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
101   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
102   PetscCall(MatDestroy(&B));
103   PetscFunctionReturn(PETSC_SUCCESS);
104 }
105 
106 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
107 {
108   Mat B;
109 
110   PetscFunctionBegin;
111   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
112   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
113   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 /*MC
118    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
119 
120    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
121    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
122   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
123   for communicators controlling multiple processes.  It is recommended that you call both of
124   the above preallocation routines for simplicity.
125 
126    Options Database Key:
127 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
128 
129   Developer Note:
130   Level: beginner
131 
132     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
133    enough exist.
134 
135 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
136 M*/
137 
138 /*MC
139    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
140 
141    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
142    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
143    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
144   for communicators controlling multiple processes.  It is recommended that you call both of
145   the above preallocation routines for simplicity.
146 
147    Options Database Key:
148 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
149 
150   Level: beginner
151 
152 .seealso: [](chapter_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
153 M*/
154 
155 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
156 {
157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
158 
159   PetscFunctionBegin;
160 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
161   A->boundtocpu = flg;
162 #endif
163   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
164   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
165 
166   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
167    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
168    * to differ from the parent matrix. */
169   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
170   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
171 
172   PetscFunctionReturn(PETSC_SUCCESS);
173 }
174 
175 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
176 {
177   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
178 
179   PetscFunctionBegin;
180   if (mat->A) {
181     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
182     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
183   }
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
188 {
189   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
190   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
191   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
192   const PetscInt  *ia, *ib;
193   const MatScalar *aa, *bb, *aav, *bav;
194   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
195   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
196 
197   PetscFunctionBegin;
198   *keptrows = NULL;
199 
200   ia = a->i;
201   ib = b->i;
202   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
203   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
204   for (i = 0; i < m; i++) {
205     na = ia[i + 1] - ia[i];
206     nb = ib[i + 1] - ib[i];
207     if (!na && !nb) {
208       cnt++;
209       goto ok1;
210     }
211     aa = aav + ia[i];
212     for (j = 0; j < na; j++) {
213       if (aa[j] != 0.0) goto ok1;
214     }
215     bb = bav + ib[i];
216     for (j = 0; j < nb; j++) {
217       if (bb[j] != 0.0) goto ok1;
218     }
219     cnt++;
220   ok1:;
221   }
222   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
223   if (!n0rows) {
224     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
225     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
226     PetscFunctionReturn(PETSC_SUCCESS);
227   }
228   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
229   cnt = 0;
230   for (i = 0; i < m; i++) {
231     na = ia[i + 1] - ia[i];
232     nb = ib[i + 1] - ib[i];
233     if (!na && !nb) continue;
234     aa = aav + ia[i];
235     for (j = 0; j < na; j++) {
236       if (aa[j] != 0.0) {
237         rows[cnt++] = rstart + i;
238         goto ok2;
239       }
240     }
241     bb = bav + ib[i];
242     for (j = 0; j < nb; j++) {
243       if (bb[j] != 0.0) {
244         rows[cnt++] = rstart + i;
245         goto ok2;
246       }
247     }
248   ok2:;
249   }
250   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
251   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
252   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
253   PetscFunctionReturn(PETSC_SUCCESS);
254 }
255 
256 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
257 {
258   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
259   PetscBool   cong;
260 
261   PetscFunctionBegin;
262   PetscCall(MatHasCongruentLayouts(Y, &cong));
263   if (Y->assembled && cong) {
264     PetscCall(MatDiagonalSet(aij->A, D, is));
265   } else {
266     PetscCall(MatDiagonalSet_Default(Y, D, is));
267   }
268   PetscFunctionReturn(PETSC_SUCCESS);
269 }
270 
271 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
272 {
273   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
274   PetscInt    i, rstart, nrows, *rows;
275 
276   PetscFunctionBegin;
277   *zrows = NULL;
278   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
279   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
280   for (i = 0; i < nrows; i++) rows[i] += rstart;
281   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
282   PetscFunctionReturn(PETSC_SUCCESS);
283 }
284 
285 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
286 {
287   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
288   PetscInt           i, m, n, *garray = aij->garray;
289   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
290   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
291   PetscReal         *work;
292   const PetscScalar *dummy;
293 
294   PetscFunctionBegin;
295   PetscCall(MatGetSize(A, &m, &n));
296   PetscCall(PetscCalloc1(n, &work));
297   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
298   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
299   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
300   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
301   if (type == NORM_2) {
302     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
303     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
304   } else if (type == NORM_1) {
305     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
306     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
307   } else if (type == NORM_INFINITY) {
308     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
309     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
310   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
311     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
312     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
313   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
314     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
315     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
316   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
317   if (type == NORM_INFINITY) {
318     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
319   } else {
320     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
321   }
322   PetscCall(PetscFree(work));
323   if (type == NORM_2) {
324     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
325   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
326     for (i = 0; i < n; i++) reductions[i] /= m;
327   }
328   PetscFunctionReturn(PETSC_SUCCESS);
329 }
330 
331 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
332 {
333   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
334   IS              sis, gis;
335   const PetscInt *isis, *igis;
336   PetscInt        n, *iis, nsis, ngis, rstart, i;
337 
338   PetscFunctionBegin;
339   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
340   PetscCall(MatFindNonzeroRows(a->B, &gis));
341   PetscCall(ISGetSize(gis, &ngis));
342   PetscCall(ISGetSize(sis, &nsis));
343   PetscCall(ISGetIndices(sis, &isis));
344   PetscCall(ISGetIndices(gis, &igis));
345 
346   PetscCall(PetscMalloc1(ngis + nsis, &iis));
347   PetscCall(PetscArraycpy(iis, igis, ngis));
348   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
349   n = ngis + nsis;
350   PetscCall(PetscSortRemoveDupsInt(&n, iis));
351   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
352   for (i = 0; i < n; i++) iis[i] += rstart;
353   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
354 
355   PetscCall(ISRestoreIndices(sis, &isis));
356   PetscCall(ISRestoreIndices(gis, &igis));
357   PetscCall(ISDestroy(&sis));
358   PetscCall(ISDestroy(&gis));
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to access.
368 */
369 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
370 {
371   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
372   PetscInt    n   = aij->B->cmap->n, i;
373 
374   PetscFunctionBegin;
375   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
376 #if defined(PETSC_USE_CTABLE)
377   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
378   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
379 #else
380   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
381   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
382 #endif
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
387   { \
388     if (col <= lastcol1) low1 = 0; \
389     else high1 = nrow1; \
390     lastcol1 = col; \
391     while (high1 - low1 > 5) { \
392       t = (low1 + high1) / 2; \
393       if (rp1[t] > col) high1 = t; \
394       else low1 = t; \
395     } \
396     for (_i = low1; _i < high1; _i++) { \
397       if (rp1[_i] > col) break; \
398       if (rp1[_i] == col) { \
399         if (addv == ADD_VALUES) { \
400           ap1[_i] += value; \
401           /* Not sure LogFlops will slow dow the code or not */ \
402           (void)PetscLogFlops(1.0); \
403         } else ap1[_i] = value; \
404         goto a_noinsert; \
405       } \
406     } \
407     if (value == 0.0 && ignorezeroentries && row != col) { \
408       low1  = 0; \
409       high1 = nrow1; \
410       goto a_noinsert; \
411     } \
412     if (nonew == 1) { \
413       low1  = 0; \
414       high1 = nrow1; \
415       goto a_noinsert; \
416     } \
417     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
418     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
419     N = nrow1++ - 1; \
420     a->nz++; \
421     high1++; \
422     /* shift up all the later entries in this row */ \
423     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
424     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
425     rp1[_i] = col; \
426     ap1[_i] = value; \
427     A->nonzerostate++; \
428   a_noinsert:; \
429     ailen[row] = nrow1; \
430   }
431 
432 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
433   { \
434     if (col <= lastcol2) low2 = 0; \
435     else high2 = nrow2; \
436     lastcol2 = col; \
437     while (high2 - low2 > 5) { \
438       t = (low2 + high2) / 2; \
439       if (rp2[t] > col) high2 = t; \
440       else low2 = t; \
441     } \
442     for (_i = low2; _i < high2; _i++) { \
443       if (rp2[_i] > col) break; \
444       if (rp2[_i] == col) { \
445         if (addv == ADD_VALUES) { \
446           ap2[_i] += value; \
447           (void)PetscLogFlops(1.0); \
448         } else ap2[_i] = value; \
449         goto b_noinsert; \
450       } \
451     } \
452     if (value == 0.0 && ignorezeroentries) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     if (nonew == 1) { \
458       low2  = 0; \
459       high2 = nrow2; \
460       goto b_noinsert; \
461     } \
462     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
463     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
464     N = nrow2++ - 1; \
465     b->nz++; \
466     high2++; \
467     /* shift up all the later entries in this row */ \
468     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
469     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
470     rp2[_i] = col; \
471     ap2[_i] = value; \
472     B->nonzerostate++; \
473   b_noinsert:; \
474     bilen[row] = nrow2; \
475   }
476 
477 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
478 {
479   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
480   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
481   PetscInt     l, *garray                         = mat->garray, diag;
482   PetscScalar *aa, *ba;
483 
484   PetscFunctionBegin;
485   /* code only works for square matrices A */
486 
487   /* find size of row to the left of the diagonal part */
488   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
489   row = row - diag;
490   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
491     if (garray[b->j[b->i[row] + l]] > diag) break;
492   }
493   if (l) {
494     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
495     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
496     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
497   }
498 
499   /* diagonal part */
500   if (a->i[row + 1] - a->i[row]) {
501     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
502     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
503     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
504   }
505 
506   /* right of diagonal part */
507   if (b->i[row + 1] - b->i[row] - l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512   PetscFunctionReturn(PETSC_SUCCESS);
513 }
514 
515 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
516 {
517   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
518   PetscScalar value = 0.0;
519   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
520   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
521   PetscBool   roworiented = aij->roworiented;
522 
523   /* Some Variables required in the macro */
524   Mat         A     = aij->A;
525   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
526   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
527   PetscBool   ignorezeroentries = a->ignorezeroentries;
528   Mat         B                 = aij->B;
529   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
530   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
531   MatScalar  *aa, *ba;
532   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
533   PetscInt    nonew;
534   MatScalar  *ap1, *ap2;
535 
536   PetscFunctionBegin;
537   PetscCall(MatSeqAIJGetArray(A, &aa));
538   PetscCall(MatSeqAIJGetArray(B, &ba));
539   for (i = 0; i < m; i++) {
540     if (im[i] < 0) continue;
541     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
542     if (im[i] >= rstart && im[i] < rend) {
543       row      = im[i] - rstart;
544       lastcol1 = -1;
545       rp1      = aj + ai[row];
546       ap1      = aa + ai[row];
547       rmax1    = aimax[row];
548       nrow1    = ailen[row];
549       low1     = 0;
550       high1    = nrow1;
551       lastcol2 = -1;
552       rp2      = bj + bi[row];
553       ap2      = ba + bi[row];
554       rmax2    = bimax[row];
555       nrow2    = bilen[row];
556       low2     = 0;
557       high2    = nrow2;
558 
559       for (j = 0; j < n; j++) {
560         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
561         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
562         if (in[j] >= cstart && in[j] < cend) {
563           col   = in[j] - cstart;
564           nonew = a->nonew;
565           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
566         } else if (in[j] < 0) {
567           continue;
568         } else {
569           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
570           if (mat->was_assembled) {
571             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
572 #if defined(PETSC_USE_CTABLE)
573             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
574             col--;
575 #else
576             col = aij->colmap[in[j]] - 1;
577 #endif
578             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
579               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
580               col = in[j];
581               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
582               B     = aij->B;
583               b     = (Mat_SeqAIJ *)B->data;
584               bimax = b->imax;
585               bi    = b->i;
586               bilen = b->ilen;
587               bj    = b->j;
588               ba    = b->a;
589               rp2   = bj + bi[row];
590               ap2   = ba + bi[row];
591               rmax2 = bimax[row];
592               nrow2 = bilen[row];
593               low2  = 0;
594               high2 = nrow2;
595               bm    = aij->B->rmap->n;
596               ba    = b->a;
597             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
598               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
599                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
600               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
601             }
602           } else col = in[j];
603           nonew = b->nonew;
604           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
605         }
606       }
607     } else {
608       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
609       if (!aij->donotstash) {
610         mat->assembled = PETSC_FALSE;
611         if (roworiented) {
612           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
613         } else {
614           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
615         }
616       }
617     }
618   }
619   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
620   PetscCall(MatSeqAIJRestoreArray(B, &ba));
621   PetscFunctionReturn(PETSC_SUCCESS);
622 }
623 
624 /*
625     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
626     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
627     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
628 */
629 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
630 {
631   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
632   Mat         A      = aij->A; /* diagonal part of the matrix */
633   Mat         B      = aij->B; /* offdiagonal part of the matrix */
634   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
635   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
636   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
637   PetscInt   *ailen = a->ilen, *aj = a->j;
638   PetscInt   *bilen = b->ilen, *bj = b->j;
639   PetscInt    am          = aij->A->rmap->n, j;
640   PetscInt    diag_so_far = 0, dnz;
641   PetscInt    offd_so_far = 0, onz;
642 
643   PetscFunctionBegin;
644   /* Iterate over all rows of the matrix */
645   for (j = 0; j < am; j++) {
646     dnz = onz = 0;
647     /*  Iterate over all non-zero columns of the current row */
648     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
649       /* If column is in the diagonal */
650       if (mat_j[col] >= cstart && mat_j[col] < cend) {
651         aj[diag_so_far++] = mat_j[col] - cstart;
652         dnz++;
653       } else { /* off-diagonal entries */
654         bj[offd_so_far++] = mat_j[col];
655         onz++;
656       }
657     }
658     ailen[j] = dnz;
659     bilen[j] = onz;
660   }
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 /*
665     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
666     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
667     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
668     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
669     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
670 */
671 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
672 {
673   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
674   Mat          A    = aij->A; /* diagonal part of the matrix */
675   Mat          B    = aij->B; /* offdiagonal part of the matrix */
676   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
677   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
678   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
679   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
680   PetscInt    *ailen = a->ilen, *aj = a->j;
681   PetscInt    *bilen = b->ilen, *bj = b->j;
682   PetscInt     am          = aij->A->rmap->n, j;
683   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
684   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
685   PetscScalar *aa = a->a, *ba = b->a;
686 
687   PetscFunctionBegin;
688   /* Iterate over all rows of the matrix */
689   for (j = 0; j < am; j++) {
690     dnz_row = onz_row = 0;
691     rowstart_offd     = full_offd_i[j];
692     rowstart_diag     = full_diag_i[j];
693     /*  Iterate over all non-zero columns of the current row */
694     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
695       /* If column is in the diagonal */
696       if (mat_j[col] >= cstart && mat_j[col] < cend) {
697         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
698         aa[rowstart_diag + dnz_row] = mat_a[col];
699         dnz_row++;
700       } else { /* off-diagonal entries */
701         bj[rowstart_offd + onz_row] = mat_j[col];
702         ba[rowstart_offd + onz_row] = mat_a[col];
703         onz_row++;
704       }
705     }
706     ailen[j] = dnz_row;
707     bilen[j] = onz_row;
708   }
709   PetscFunctionReturn(PETSC_SUCCESS);
710 }
711 
712 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
713 {
714   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
715   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
716   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
717 
718   PetscFunctionBegin;
719   for (i = 0; i < m; i++) {
720     if (idxm[i] < 0) continue; /* negative row */
721     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
722     if (idxm[i] >= rstart && idxm[i] < rend) {
723       row = idxm[i] - rstart;
724       for (j = 0; j < n; j++) {
725         if (idxn[j] < 0) continue; /* negative column */
726         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
727         if (idxn[j] >= cstart && idxn[j] < cend) {
728           col = idxn[j] - cstart;
729           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
730         } else {
731           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
732 #if defined(PETSC_USE_CTABLE)
733           PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
734           col--;
735 #else
736           col = aij->colmap[idxn[j]] - 1;
737 #endif
738           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
739           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
740         }
741       }
742     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
743   }
744   PetscFunctionReturn(PETSC_SUCCESS);
745 }
746 
747 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
748 {
749   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
750   PetscInt    nstash, reallocs;
751 
752   PetscFunctionBegin;
753   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
754 
755   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
756   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
757   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
758   PetscFunctionReturn(PETSC_SUCCESS);
759 }
760 
761 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
762 {
763   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
764   PetscMPIInt  n;
765   PetscInt     i, j, rstart, ncols, flg;
766   PetscInt    *row, *col;
767   PetscBool    other_disassembled;
768   PetscScalar *val;
769 
770   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
771 
772   PetscFunctionBegin;
773   if (!aij->donotstash && !mat->nooffprocentries) {
774     while (1) {
775       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
776       if (!flg) break;
777 
778       for (i = 0; i < n;) {
779         /* Now identify the consecutive vals belonging to the same row */
780         for (j = i, rstart = row[j]; j < n; j++) {
781           if (row[j] != rstart) break;
782         }
783         if (j < n) ncols = j - i;
784         else ncols = n - i;
785         /* Now assemble all these values with a single function call */
786         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
787         i = j;
788       }
789     }
790     PetscCall(MatStashScatterEnd_Private(&mat->stash));
791   }
792 #if defined(PETSC_HAVE_DEVICE)
793   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
794   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
795   if (mat->boundtocpu) {
796     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
797     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
798   }
799 #endif
800   PetscCall(MatAssemblyBegin(aij->A, mode));
801   PetscCall(MatAssemblyEnd(aij->A, mode));
802 
803   /* determine if any processor has disassembled, if so we must
804      also disassemble ourself, in order that we may reassemble. */
805   /*
806      if nonzero structure of submatrix B cannot change then we know that
807      no processor disassembled thus we can skip this stuff
808   */
809   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
810     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
811     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
812       PetscCall(MatDisAssemble_MPIAIJ(mat));
813     }
814   }
815   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
816   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
817 #if defined(PETSC_HAVE_DEVICE)
818   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
819 #endif
820   PetscCall(MatAssemblyBegin(aij->B, mode));
821   PetscCall(MatAssemblyEnd(aij->B, mode));
822 
823   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
824 
825   aij->rowvalues = NULL;
826 
827   PetscCall(VecDestroy(&aij->diag));
828 
829   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
830   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
831     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
832     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
833   }
834 #if defined(PETSC_HAVE_DEVICE)
835   mat->offloadmask = PETSC_OFFLOAD_BOTH;
836 #endif
837   PetscFunctionReturn(PETSC_SUCCESS);
838 }
839 
840 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
841 {
842   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
843 
844   PetscFunctionBegin;
845   PetscCall(MatZeroEntries(l->A));
846   PetscCall(MatZeroEntries(l->B));
847   PetscFunctionReturn(PETSC_SUCCESS);
848 }
849 
850 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
851 {
852   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
853   PetscObjectState sA, sB;
854   PetscInt        *lrows;
855   PetscInt         r, len;
856   PetscBool        cong, lch, gch;
857 
858   PetscFunctionBegin;
859   /* get locally owned rows */
860   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
861   PetscCall(MatHasCongruentLayouts(A, &cong));
862   /* fix right hand side if needed */
863   if (x && b) {
864     const PetscScalar *xx;
865     PetscScalar       *bb;
866 
867     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
868     PetscCall(VecGetArrayRead(x, &xx));
869     PetscCall(VecGetArray(b, &bb));
870     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
871     PetscCall(VecRestoreArrayRead(x, &xx));
872     PetscCall(VecRestoreArray(b, &bb));
873   }
874 
875   sA = mat->A->nonzerostate;
876   sB = mat->B->nonzerostate;
877 
878   if (diag != 0.0 && cong) {
879     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
880     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
881   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
882     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
883     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
884     PetscInt    nnwA, nnwB;
885     PetscBool   nnzA, nnzB;
886 
887     nnwA = aijA->nonew;
888     nnwB = aijB->nonew;
889     nnzA = aijA->keepnonzeropattern;
890     nnzB = aijB->keepnonzeropattern;
891     if (!nnzA) {
892       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
893       aijA->nonew = 0;
894     }
895     if (!nnzB) {
896       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
897       aijB->nonew = 0;
898     }
899     /* Must zero here before the next loop */
900     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
901     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
902     for (r = 0; r < len; ++r) {
903       const PetscInt row = lrows[r] + A->rmap->rstart;
904       if (row >= A->cmap->N) continue;
905       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
906     }
907     aijA->nonew = nnwA;
908     aijB->nonew = nnwB;
909   } else {
910     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
911     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
912   }
913   PetscCall(PetscFree(lrows));
914   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
915   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
916 
917   /* reduce nonzerostate */
918   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
919   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
920   if (gch) A->nonzerostate++;
921   PetscFunctionReturn(PETSC_SUCCESS);
922 }
923 
924 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
925 {
926   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
927   PetscMPIInt        n = A->rmap->n;
928   PetscInt           i, j, r, m, len = 0;
929   PetscInt          *lrows, *owners = A->rmap->range;
930   PetscMPIInt        p = 0;
931   PetscSFNode       *rrows;
932   PetscSF            sf;
933   const PetscScalar *xx;
934   PetscScalar       *bb, *mask, *aij_a;
935   Vec                xmask, lmask;
936   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
937   const PetscInt    *aj, *ii, *ridx;
938   PetscScalar       *aa;
939 
940   PetscFunctionBegin;
941   /* Create SF where leaves are input rows and roots are owned rows */
942   PetscCall(PetscMalloc1(n, &lrows));
943   for (r = 0; r < n; ++r) lrows[r] = -1;
944   PetscCall(PetscMalloc1(N, &rrows));
945   for (r = 0; r < N; ++r) {
946     const PetscInt idx = rows[r];
947     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
948     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
949       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
950     }
951     rrows[r].rank  = p;
952     rrows[r].index = rows[r] - owners[p];
953   }
954   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
955   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
956   /* Collect flags for rows to be zeroed */
957   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
958   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
959   PetscCall(PetscSFDestroy(&sf));
960   /* Compress and put in row numbers */
961   for (r = 0; r < n; ++r)
962     if (lrows[r] >= 0) lrows[len++] = r;
963   /* zero diagonal part of matrix */
964   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
965   /* handle off diagonal part of matrix */
966   PetscCall(MatCreateVecs(A, &xmask, NULL));
967   PetscCall(VecDuplicate(l->lvec, &lmask));
968   PetscCall(VecGetArray(xmask, &bb));
969   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
970   PetscCall(VecRestoreArray(xmask, &bb));
971   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
972   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
973   PetscCall(VecDestroy(&xmask));
974   if (x && b) { /* this code is buggy when the row and column layout don't match */
975     PetscBool cong;
976 
977     PetscCall(MatHasCongruentLayouts(A, &cong));
978     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
979     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
980     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
981     PetscCall(VecGetArrayRead(l->lvec, &xx));
982     PetscCall(VecGetArray(b, &bb));
983   }
984   PetscCall(VecGetArray(lmask, &mask));
985   /* remove zeroed rows of off diagonal matrix */
986   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
987   ii = aij->i;
988   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
989   /* loop over all elements of off process part of matrix zeroing removed columns*/
990   if (aij->compressedrow.use) {
991     m    = aij->compressedrow.nrows;
992     ii   = aij->compressedrow.i;
993     ridx = aij->compressedrow.rindex;
994     for (i = 0; i < m; i++) {
995       n  = ii[i + 1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij_a + ii[i];
998 
999       for (j = 0; j < n; j++) {
1000         if (PetscAbsScalar(mask[*aj])) {
1001           if (b) bb[*ridx] -= *aa * xx[*aj];
1002           *aa = 0.0;
1003         }
1004         aa++;
1005         aj++;
1006       }
1007       ridx++;
1008     }
1009   } else { /* do not use compressed row format */
1010     m = l->B->rmap->n;
1011     for (i = 0; i < m; i++) {
1012       n  = ii[i + 1] - ii[i];
1013       aj = aij->j + ii[i];
1014       aa = aij_a + ii[i];
1015       for (j = 0; j < n; j++) {
1016         if (PetscAbsScalar(mask[*aj])) {
1017           if (b) bb[i] -= *aa * xx[*aj];
1018           *aa = 0.0;
1019         }
1020         aa++;
1021         aj++;
1022       }
1023     }
1024   }
1025   if (x && b) {
1026     PetscCall(VecRestoreArray(b, &bb));
1027     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1028   }
1029   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1030   PetscCall(VecRestoreArray(lmask, &mask));
1031   PetscCall(VecDestroy(&lmask));
1032   PetscCall(PetscFree(lrows));
1033 
1034   /* only change matrix nonzero state if pattern was allowed to be changed */
1035   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
1036     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1037     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1038   }
1039   PetscFunctionReturn(PETSC_SUCCESS);
1040 }
1041 
1042 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1043 {
1044   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1045   PetscInt    nt;
1046   VecScatter  Mvctx = a->Mvctx;
1047 
1048   PetscFunctionBegin;
1049   PetscCall(VecGetLocalSize(xx, &nt));
1050   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1051   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1052   PetscUseTypeMethod(a->A, mult, xx, yy);
1053   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1054   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1055   PetscFunctionReturn(PETSC_SUCCESS);
1056 }
1057 
1058 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1059 {
1060   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1064   PetscFunctionReturn(PETSC_SUCCESS);
1065 }
1066 
1067 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1068 {
1069   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1070   VecScatter  Mvctx = a->Mvctx;
1071 
1072   PetscFunctionBegin;
1073   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1074   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1075   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1076   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1077   PetscFunctionReturn(PETSC_SUCCESS);
1078 }
1079 
1080 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1081 {
1082   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1083 
1084   PetscFunctionBegin;
1085   /* do nondiagonal part */
1086   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1087   /* do local part */
1088   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1089   /* add partial results together */
1090   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1091   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1092   PetscFunctionReturn(PETSC_SUCCESS);
1093 }
1094 
1095 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1096 {
1097   MPI_Comm    comm;
1098   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1099   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1100   IS          Me, Notme;
1101   PetscInt    M, N, first, last, *notme, i;
1102   PetscBool   lf;
1103   PetscMPIInt size;
1104 
1105   PetscFunctionBegin;
1106   /* Easy test: symmetric diagonal block */
1107   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1108   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1109   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1110   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1111   PetscCallMPI(MPI_Comm_size(comm, &size));
1112   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1113 
1114   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1115   PetscCall(MatGetSize(Amat, &M, &N));
1116   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1117   PetscCall(PetscMalloc1(N - last + first, &notme));
1118   for (i = 0; i < first; i++) notme[i] = i;
1119   for (i = last; i < M; i++) notme[i - last + first] = i;
1120   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1121   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1122   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1123   Aoff = Aoffs[0];
1124   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1125   Boff = Boffs[0];
1126   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1127   PetscCall(MatDestroyMatrices(1, &Aoffs));
1128   PetscCall(MatDestroyMatrices(1, &Boffs));
1129   PetscCall(ISDestroy(&Me));
1130   PetscCall(ISDestroy(&Notme));
1131   PetscCall(PetscFree(notme));
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 
1135 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1136 {
1137   PetscFunctionBegin;
1138   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1139   PetscFunctionReturn(PETSC_SUCCESS);
1140 }
1141 
1142 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1143 {
1144   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1145 
1146   PetscFunctionBegin;
1147   /* do nondiagonal part */
1148   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1149   /* do local part */
1150   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1151   /* add partial results together */
1152   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1153   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1154   PetscFunctionReturn(PETSC_SUCCESS);
1155 }
1156 
1157 /*
1158   This only works correctly for square matrices where the subblock A->A is the
1159    diagonal block
1160 */
1161 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1162 {
1163   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1164 
1165   PetscFunctionBegin;
1166   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1167   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1168   PetscCall(MatGetDiagonal(a->A, v));
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1173 {
1174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1175 
1176   PetscFunctionBegin;
1177   PetscCall(MatScale(a->A, aa));
1178   PetscCall(MatScale(a->B, aa));
1179   PetscFunctionReturn(PETSC_SUCCESS);
1180 }
1181 
1182 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1183 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1184 {
1185   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1186 
1187   PetscFunctionBegin;
1188   PetscCall(PetscSFDestroy(&aij->coo_sf));
1189   PetscCall(PetscFree(aij->Aperm1));
1190   PetscCall(PetscFree(aij->Bperm1));
1191   PetscCall(PetscFree(aij->Ajmap1));
1192   PetscCall(PetscFree(aij->Bjmap1));
1193 
1194   PetscCall(PetscFree(aij->Aimap2));
1195   PetscCall(PetscFree(aij->Bimap2));
1196   PetscCall(PetscFree(aij->Aperm2));
1197   PetscCall(PetscFree(aij->Bperm2));
1198   PetscCall(PetscFree(aij->Ajmap2));
1199   PetscCall(PetscFree(aij->Bjmap2));
1200 
1201   PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf));
1202   PetscCall(PetscFree(aij->Cperm1));
1203   PetscFunctionReturn(PETSC_SUCCESS);
1204 }
1205 
1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1207 {
1208   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1209   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1210   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1211   const PetscInt    *garray = aij->garray;
1212   const PetscScalar *aa, *ba;
1213   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1214   PetscInt64         nz, hnz;
1215   PetscInt          *rowlens;
1216   PetscInt          *colidxs;
1217   PetscScalar       *matvals;
1218   PetscMPIInt        rank;
1219 
1220   PetscFunctionBegin;
1221   PetscCall(PetscViewerSetUp(viewer));
1222 
1223   M  = mat->rmap->N;
1224   N  = mat->cmap->N;
1225   m  = mat->rmap->n;
1226   rs = mat->rmap->rstart;
1227   cs = mat->cmap->rstart;
1228   nz = A->nz + B->nz;
1229 
1230   /* write matrix header */
1231   header[0] = MAT_FILE_CLASSID;
1232   header[1] = M;
1233   header[2] = N;
1234   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1235   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1236   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1237   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1238 
1239   /* fill in and store row lengths  */
1240   PetscCall(PetscMalloc1(m, &rowlens));
1241   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1242   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1243   PetscCall(PetscFree(rowlens));
1244 
1245   /* fill in and store column indices */
1246   PetscCall(PetscMalloc1(nz, &colidxs));
1247   for (cnt = 0, i = 0; i < m; i++) {
1248     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1249       if (garray[B->j[jb]] > cs) break;
1250       colidxs[cnt++] = garray[B->j[jb]];
1251     }
1252     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1253     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1254   }
1255   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1256   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1257   PetscCall(PetscFree(colidxs));
1258 
1259   /* fill in and store nonzero values */
1260   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1261   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1262   PetscCall(PetscMalloc1(nz, &matvals));
1263   for (cnt = 0, i = 0; i < m; i++) {
1264     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1265       if (garray[B->j[jb]] > cs) break;
1266       matvals[cnt++] = ba[jb];
1267     }
1268     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1269     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1270   }
1271   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1272   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1273   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1274   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1275   PetscCall(PetscFree(matvals));
1276 
1277   /* write block size option to the viewer's .info file */
1278   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1279   PetscFunctionReturn(PETSC_SUCCESS);
1280 }
1281 
1282 #include <petscdraw.h>
1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1284 {
1285   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1286   PetscMPIInt       rank = aij->rank, size = aij->size;
1287   PetscBool         isdraw, iascii, isbinary;
1288   PetscViewer       sviewer;
1289   PetscViewerFormat format;
1290 
1291   PetscFunctionBegin;
1292   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1293   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1294   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1295   if (iascii) {
1296     PetscCall(PetscViewerGetFormat(viewer, &format));
1297     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1298       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1299       PetscCall(PetscMalloc1(size, &nz));
1300       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1301       for (i = 0; i < (PetscInt)size; i++) {
1302         nmax = PetscMax(nmax, nz[i]);
1303         nmin = PetscMin(nmin, nz[i]);
1304         navg += nz[i];
1305       }
1306       PetscCall(PetscFree(nz));
1307       navg = navg / size;
1308       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1309       PetscFunctionReturn(PETSC_SUCCESS);
1310     }
1311     PetscCall(PetscViewerGetFormat(viewer, &format));
1312     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1313       MatInfo   info;
1314       PetscInt *inodes = NULL;
1315 
1316       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1317       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1318       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1319       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1320       if (!inodes) {
1321         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1322                                                      (double)info.memory));
1323       } else {
1324         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1325                                                      (double)info.memory));
1326       }
1327       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1328       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1329       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1330       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1331       PetscCall(PetscViewerFlush(viewer));
1332       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1333       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1334       PetscCall(VecScatterView(aij->Mvctx, viewer));
1335       PetscFunctionReturn(PETSC_SUCCESS);
1336     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1337       PetscInt inodecount, inodelimit, *inodes;
1338       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1339       if (inodes) {
1340         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1341       } else {
1342         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1343       }
1344       PetscFunctionReturn(PETSC_SUCCESS);
1345     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1346       PetscFunctionReturn(PETSC_SUCCESS);
1347     }
1348   } else if (isbinary) {
1349     if (size == 1) {
1350       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1351       PetscCall(MatView(aij->A, viewer));
1352     } else {
1353       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1354     }
1355     PetscFunctionReturn(PETSC_SUCCESS);
1356   } else if (iascii && size == 1) {
1357     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1358     PetscCall(MatView(aij->A, viewer));
1359     PetscFunctionReturn(PETSC_SUCCESS);
1360   } else if (isdraw) {
1361     PetscDraw draw;
1362     PetscBool isnull;
1363     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1364     PetscCall(PetscDrawIsNull(draw, &isnull));
1365     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1366   }
1367 
1368   { /* assemble the entire matrix onto first processor */
1369     Mat A = NULL, Av;
1370     IS  isrow, iscol;
1371 
1372     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1373     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1374     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1375     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1376     /*  The commented code uses MatCreateSubMatrices instead */
1377     /*
1378     Mat *AA, A = NULL, Av;
1379     IS  isrow,iscol;
1380 
1381     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1382     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1383     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1384     if (rank == 0) {
1385        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1386        A    = AA[0];
1387        Av   = AA[0];
1388     }
1389     PetscCall(MatDestroySubMatrices(1,&AA));
1390 */
1391     PetscCall(ISDestroy(&iscol));
1392     PetscCall(ISDestroy(&isrow));
1393     /*
1394        Everyone has to call to draw the matrix since the graphics waits are
1395        synchronized across all processors that share the PetscDraw object
1396     */
1397     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1398     if (rank == 0) {
1399       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1400       PetscCall(MatView_SeqAIJ(Av, sviewer));
1401     }
1402     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1403     PetscCall(PetscViewerFlush(viewer));
1404     PetscCall(MatDestroy(&A));
1405   }
1406   PetscFunctionReturn(PETSC_SUCCESS);
1407 }
1408 
1409 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1410 {
1411   PetscBool iascii, isdraw, issocket, isbinary;
1412 
1413   PetscFunctionBegin;
1414   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1415   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1416   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1417   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1418   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1419   PetscFunctionReturn(PETSC_SUCCESS);
1420 }
1421 
1422 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1423 {
1424   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1425   Vec         bb1 = NULL;
1426   PetscBool   hasop;
1427 
1428   PetscFunctionBegin;
1429   if (flag == SOR_APPLY_UPPER) {
1430     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1431     PetscFunctionReturn(PETSC_SUCCESS);
1432   }
1433 
1434   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1435 
1436   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1437     if (flag & SOR_ZERO_INITIAL_GUESS) {
1438       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1439       its--;
1440     }
1441 
1442     while (its--) {
1443       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1444       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1445 
1446       /* update rhs: bb1 = bb - B*x */
1447       PetscCall(VecScale(mat->lvec, -1.0));
1448       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1449 
1450       /* local sweep */
1451       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1452     }
1453   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1454     if (flag & SOR_ZERO_INITIAL_GUESS) {
1455       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1456       its--;
1457     }
1458     while (its--) {
1459       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1460       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1461 
1462       /* update rhs: bb1 = bb - B*x */
1463       PetscCall(VecScale(mat->lvec, -1.0));
1464       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1465 
1466       /* local sweep */
1467       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1468     }
1469   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1470     if (flag & SOR_ZERO_INITIAL_GUESS) {
1471       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1472       its--;
1473     }
1474     while (its--) {
1475       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1476       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1477 
1478       /* update rhs: bb1 = bb - B*x */
1479       PetscCall(VecScale(mat->lvec, -1.0));
1480       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1481 
1482       /* local sweep */
1483       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1484     }
1485   } else if (flag & SOR_EISENSTAT) {
1486     Vec xx1;
1487 
1488     PetscCall(VecDuplicate(bb, &xx1));
1489     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1490 
1491     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1492     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1493     if (!mat->diag) {
1494       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1495       PetscCall(MatGetDiagonal(matin, mat->diag));
1496     }
1497     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1498     if (hasop) {
1499       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1500     } else {
1501       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1502     }
1503     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1504 
1505     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1506 
1507     /* local sweep */
1508     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1509     PetscCall(VecAXPY(xx, 1.0, xx1));
1510     PetscCall(VecDestroy(&xx1));
1511   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1512 
1513   PetscCall(VecDestroy(&bb1));
1514 
1515   matin->factorerrortype = mat->A->factorerrortype;
1516   PetscFunctionReturn(PETSC_SUCCESS);
1517 }
1518 
1519 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1520 {
1521   Mat             aA, aB, Aperm;
1522   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1523   PetscScalar    *aa, *ba;
1524   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1525   PetscSF         rowsf, sf;
1526   IS              parcolp = NULL;
1527   PetscBool       done;
1528 
1529   PetscFunctionBegin;
1530   PetscCall(MatGetLocalSize(A, &m, &n));
1531   PetscCall(ISGetIndices(rowp, &rwant));
1532   PetscCall(ISGetIndices(colp, &cwant));
1533   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1534 
1535   /* Invert row permutation to find out where my rows should go */
1536   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1537   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1538   PetscCall(PetscSFSetFromOptions(rowsf));
1539   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1540   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1541   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1542 
1543   /* Invert column permutation to find out where my columns should go */
1544   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1545   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1546   PetscCall(PetscSFSetFromOptions(sf));
1547   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1548   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1549   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1550   PetscCall(PetscSFDestroy(&sf));
1551 
1552   PetscCall(ISRestoreIndices(rowp, &rwant));
1553   PetscCall(ISRestoreIndices(colp, &cwant));
1554   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1555 
1556   /* Find out where my gcols should go */
1557   PetscCall(MatGetSize(aB, NULL, &ng));
1558   PetscCall(PetscMalloc1(ng, &gcdest));
1559   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1560   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1561   PetscCall(PetscSFSetFromOptions(sf));
1562   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1563   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1564   PetscCall(PetscSFDestroy(&sf));
1565 
1566   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1567   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1568   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1569   for (i = 0; i < m; i++) {
1570     PetscInt    row = rdest[i];
1571     PetscMPIInt rowner;
1572     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1573     for (j = ai[i]; j < ai[i + 1]; j++) {
1574       PetscInt    col = cdest[aj[j]];
1575       PetscMPIInt cowner;
1576       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1577       if (rowner == cowner) dnnz[i]++;
1578       else onnz[i]++;
1579     }
1580     for (j = bi[i]; j < bi[i + 1]; j++) {
1581       PetscInt    col = gcdest[bj[j]];
1582       PetscMPIInt cowner;
1583       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1584       if (rowner == cowner) dnnz[i]++;
1585       else onnz[i]++;
1586     }
1587   }
1588   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1589   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1590   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1591   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1592   PetscCall(PetscSFDestroy(&rowsf));
1593 
1594   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1595   PetscCall(MatSeqAIJGetArray(aA, &aa));
1596   PetscCall(MatSeqAIJGetArray(aB, &ba));
1597   for (i = 0; i < m; i++) {
1598     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1599     PetscInt  j0, rowlen;
1600     rowlen = ai[i + 1] - ai[i];
1601     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1602       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1603       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1604     }
1605     rowlen = bi[i + 1] - bi[i];
1606     for (j0 = j = 0; j < rowlen; j0 = j) {
1607       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1608       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1609     }
1610   }
1611   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1612   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1613   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1614   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1615   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1616   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1617   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1618   PetscCall(PetscFree3(work, rdest, cdest));
1619   PetscCall(PetscFree(gcdest));
1620   if (parcolp) PetscCall(ISDestroy(&colp));
1621   *B = Aperm;
1622   PetscFunctionReturn(PETSC_SUCCESS);
1623 }
1624 
1625 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1626 {
1627   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1628 
1629   PetscFunctionBegin;
1630   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1631   if (ghosts) *ghosts = aij->garray;
1632   PetscFunctionReturn(PETSC_SUCCESS);
1633 }
1634 
1635 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1636 {
1637   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1638   Mat            A = mat->A, B = mat->B;
1639   PetscLogDouble isend[5], irecv[5];
1640 
1641   PetscFunctionBegin;
1642   info->block_size = 1.0;
1643   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1644 
1645   isend[0] = info->nz_used;
1646   isend[1] = info->nz_allocated;
1647   isend[2] = info->nz_unneeded;
1648   isend[3] = info->memory;
1649   isend[4] = info->mallocs;
1650 
1651   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1652 
1653   isend[0] += info->nz_used;
1654   isend[1] += info->nz_allocated;
1655   isend[2] += info->nz_unneeded;
1656   isend[3] += info->memory;
1657   isend[4] += info->mallocs;
1658   if (flag == MAT_LOCAL) {
1659     info->nz_used      = isend[0];
1660     info->nz_allocated = isend[1];
1661     info->nz_unneeded  = isend[2];
1662     info->memory       = isend[3];
1663     info->mallocs      = isend[4];
1664   } else if (flag == MAT_GLOBAL_MAX) {
1665     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1666 
1667     info->nz_used      = irecv[0];
1668     info->nz_allocated = irecv[1];
1669     info->nz_unneeded  = irecv[2];
1670     info->memory       = irecv[3];
1671     info->mallocs      = irecv[4];
1672   } else if (flag == MAT_GLOBAL_SUM) {
1673     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1674 
1675     info->nz_used      = irecv[0];
1676     info->nz_allocated = irecv[1];
1677     info->nz_unneeded  = irecv[2];
1678     info->memory       = irecv[3];
1679     info->mallocs      = irecv[4];
1680   }
1681   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1682   info->fill_ratio_needed = 0;
1683   info->factor_mallocs    = 0;
1684   PetscFunctionReturn(PETSC_SUCCESS);
1685 }
1686 
1687 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1688 {
1689   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1690 
1691   PetscFunctionBegin;
1692   switch (op) {
1693   case MAT_NEW_NONZERO_LOCATIONS:
1694   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1695   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1696   case MAT_KEEP_NONZERO_PATTERN:
1697   case MAT_NEW_NONZERO_LOCATION_ERR:
1698   case MAT_USE_INODES:
1699   case MAT_IGNORE_ZERO_ENTRIES:
1700   case MAT_FORM_EXPLICIT_TRANSPOSE:
1701     MatCheckPreallocated(A, 1);
1702     PetscCall(MatSetOption(a->A, op, flg));
1703     PetscCall(MatSetOption(a->B, op, flg));
1704     break;
1705   case MAT_ROW_ORIENTED:
1706     MatCheckPreallocated(A, 1);
1707     a->roworiented = flg;
1708 
1709     PetscCall(MatSetOption(a->A, op, flg));
1710     PetscCall(MatSetOption(a->B, op, flg));
1711     break;
1712   case MAT_FORCE_DIAGONAL_ENTRIES:
1713   case MAT_SORTED_FULL:
1714     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1715     break;
1716   case MAT_IGNORE_OFF_PROC_ENTRIES:
1717     a->donotstash = flg;
1718     break;
1719   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1720   case MAT_SPD:
1721   case MAT_SYMMETRIC:
1722   case MAT_STRUCTURALLY_SYMMETRIC:
1723   case MAT_HERMITIAN:
1724   case MAT_SYMMETRY_ETERNAL:
1725   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1726   case MAT_SPD_ETERNAL:
1727     /* if the diagonal matrix is square it inherits some of the properties above */
1728     break;
1729   case MAT_SUBMAT_SINGLEIS:
1730     A->submat_singleis = flg;
1731     break;
1732   case MAT_STRUCTURE_ONLY:
1733     /* The option is handled directly by MatSetOption() */
1734     break;
1735   default:
1736     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1737   }
1738   PetscFunctionReturn(PETSC_SUCCESS);
1739 }
1740 
1741 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1742 {
1743   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1744   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1745   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1746   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1747   PetscInt    *cmap, *idx_p;
1748 
1749   PetscFunctionBegin;
1750   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1751   mat->getrowactive = PETSC_TRUE;
1752 
1753   if (!mat->rowvalues && (idx || v)) {
1754     /*
1755         allocate enough space to hold information from the longest row.
1756     */
1757     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1758     PetscInt    max = 1, tmp;
1759     for (i = 0; i < matin->rmap->n; i++) {
1760       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1761       if (max < tmp) max = tmp;
1762     }
1763     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1764   }
1765 
1766   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1767   lrow = row - rstart;
1768 
1769   pvA = &vworkA;
1770   pcA = &cworkA;
1771   pvB = &vworkB;
1772   pcB = &cworkB;
1773   if (!v) {
1774     pvA = NULL;
1775     pvB = NULL;
1776   }
1777   if (!idx) {
1778     pcA = NULL;
1779     if (!v) pcB = NULL;
1780   }
1781   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1782   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1783   nztot = nzA + nzB;
1784 
1785   cmap = mat->garray;
1786   if (v || idx) {
1787     if (nztot) {
1788       /* Sort by increasing column numbers, assuming A and B already sorted */
1789       PetscInt imark = -1;
1790       if (v) {
1791         *v = v_p = mat->rowvalues;
1792         for (i = 0; i < nzB; i++) {
1793           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1794           else break;
1795         }
1796         imark = i;
1797         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1798         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1799       }
1800       if (idx) {
1801         *idx = idx_p = mat->rowindices;
1802         if (imark > -1) {
1803           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1804         } else {
1805           for (i = 0; i < nzB; i++) {
1806             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1807             else break;
1808           }
1809           imark = i;
1810         }
1811         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1812         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1813       }
1814     } else {
1815       if (idx) *idx = NULL;
1816       if (v) *v = NULL;
1817     }
1818   }
1819   *nz = nztot;
1820   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1821   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1822   PetscFunctionReturn(PETSC_SUCCESS);
1823 }
1824 
1825 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1826 {
1827   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1828 
1829   PetscFunctionBegin;
1830   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1831   aij->getrowactive = PETSC_FALSE;
1832   PetscFunctionReturn(PETSC_SUCCESS);
1833 }
1834 
1835 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1836 {
1837   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1838   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1839   PetscInt         i, j, cstart = mat->cmap->rstart;
1840   PetscReal        sum = 0.0;
1841   const MatScalar *v, *amata, *bmata;
1842 
1843   PetscFunctionBegin;
1844   if (aij->size == 1) {
1845     PetscCall(MatNorm(aij->A, type, norm));
1846   } else {
1847     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1848     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1849     if (type == NORM_FROBENIUS) {
1850       v = amata;
1851       for (i = 0; i < amat->nz; i++) {
1852         sum += PetscRealPart(PetscConj(*v) * (*v));
1853         v++;
1854       }
1855       v = bmata;
1856       for (i = 0; i < bmat->nz; i++) {
1857         sum += PetscRealPart(PetscConj(*v) * (*v));
1858         v++;
1859       }
1860       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1861       *norm = PetscSqrtReal(*norm);
1862       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1863     } else if (type == NORM_1) { /* max column norm */
1864       PetscReal *tmp, *tmp2;
1865       PetscInt  *jj, *garray = aij->garray;
1866       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1867       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1868       *norm = 0.0;
1869       v     = amata;
1870       jj    = amat->j;
1871       for (j = 0; j < amat->nz; j++) {
1872         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1873         v++;
1874       }
1875       v  = bmata;
1876       jj = bmat->j;
1877       for (j = 0; j < bmat->nz; j++) {
1878         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1879         v++;
1880       }
1881       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1882       for (j = 0; j < mat->cmap->N; j++) {
1883         if (tmp2[j] > *norm) *norm = tmp2[j];
1884       }
1885       PetscCall(PetscFree(tmp));
1886       PetscCall(PetscFree(tmp2));
1887       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1888     } else if (type == NORM_INFINITY) { /* max row norm */
1889       PetscReal ntemp = 0.0;
1890       for (j = 0; j < aij->A->rmap->n; j++) {
1891         v   = amata + amat->i[j];
1892         sum = 0.0;
1893         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1894           sum += PetscAbsScalar(*v);
1895           v++;
1896         }
1897         v = bmata + bmat->i[j];
1898         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1899           sum += PetscAbsScalar(*v);
1900           v++;
1901         }
1902         if (sum > ntemp) ntemp = sum;
1903       }
1904       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1905       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1906     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1907     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1908     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1909   }
1910   PetscFunctionReturn(PETSC_SUCCESS);
1911 }
1912 
1913 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1914 {
1915   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1916   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1917   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1918   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1919   Mat              B, A_diag, *B_diag;
1920   const MatScalar *pbv, *bv;
1921 
1922   PetscFunctionBegin;
1923   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1924   ma = A->rmap->n;
1925   na = A->cmap->n;
1926   mb = a->B->rmap->n;
1927   nb = a->B->cmap->n;
1928   ai = Aloc->i;
1929   aj = Aloc->j;
1930   bi = Bloc->i;
1931   bj = Bloc->j;
1932   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1933     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1934     PetscSFNode         *oloc;
1935     PETSC_UNUSED PetscSF sf;
1936 
1937     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1938     /* compute d_nnz for preallocation */
1939     PetscCall(PetscArrayzero(d_nnz, na));
1940     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1941     /* compute local off-diagonal contributions */
1942     PetscCall(PetscArrayzero(g_nnz, nb));
1943     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1944     /* map those to global */
1945     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1946     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1947     PetscCall(PetscSFSetFromOptions(sf));
1948     PetscCall(PetscArrayzero(o_nnz, na));
1949     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1950     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1951     PetscCall(PetscSFDestroy(&sf));
1952 
1953     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1954     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1955     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1956     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1957     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1958     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1959   } else {
1960     B = *matout;
1961     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1962   }
1963 
1964   b           = (Mat_MPIAIJ *)B->data;
1965   A_diag      = a->A;
1966   B_diag      = &b->A;
1967   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1968   A_diag_ncol = A_diag->cmap->N;
1969   B_diag_ilen = sub_B_diag->ilen;
1970   B_diag_i    = sub_B_diag->i;
1971 
1972   /* Set ilen for diagonal of B */
1973   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1974 
1975   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1976   very quickly (=without using MatSetValues), because all writes are local. */
1977   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1978   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1979 
1980   /* copy over the B part */
1981   PetscCall(PetscMalloc1(bi[mb], &cols));
1982   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1983   pbv = bv;
1984   row = A->rmap->rstart;
1985   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1986   cols_tmp = cols;
1987   for (i = 0; i < mb; i++) {
1988     ncol = bi[i + 1] - bi[i];
1989     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1990     row++;
1991     pbv += ncol;
1992     cols_tmp += ncol;
1993   }
1994   PetscCall(PetscFree(cols));
1995   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1996 
1997   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1998   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1999   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2000     *matout = B;
2001   } else {
2002     PetscCall(MatHeaderMerge(A, &B));
2003   }
2004   PetscFunctionReturn(PETSC_SUCCESS);
2005 }
2006 
2007 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
2008 {
2009   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2010   Mat         a = aij->A, b = aij->B;
2011   PetscInt    s1, s2, s3;
2012 
2013   PetscFunctionBegin;
2014   PetscCall(MatGetLocalSize(mat, &s2, &s3));
2015   if (rr) {
2016     PetscCall(VecGetLocalSize(rr, &s1));
2017     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
2018     /* Overlap communication with computation. */
2019     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2020   }
2021   if (ll) {
2022     PetscCall(VecGetLocalSize(ll, &s1));
2023     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
2024     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
2025   }
2026   /* scale  the diagonal block */
2027   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2028 
2029   if (rr) {
2030     /* Do a scatter end and then right scale the off-diagonal block */
2031     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2032     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2033   }
2034   PetscFunctionReturn(PETSC_SUCCESS);
2035 }
2036 
2037 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2038 {
2039   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2040 
2041   PetscFunctionBegin;
2042   PetscCall(MatSetUnfactored(a->A));
2043   PetscFunctionReturn(PETSC_SUCCESS);
2044 }
2045 
2046 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2047 {
2048   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2049   Mat         a, b, c, d;
2050   PetscBool   flg;
2051 
2052   PetscFunctionBegin;
2053   a = matA->A;
2054   b = matA->B;
2055   c = matB->A;
2056   d = matB->B;
2057 
2058   PetscCall(MatEqual(a, c, &flg));
2059   if (flg) PetscCall(MatEqual(b, d, &flg));
2060   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2061   PetscFunctionReturn(PETSC_SUCCESS);
2062 }
2063 
2064 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2065 {
2066   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2067   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2068 
2069   PetscFunctionBegin;
2070   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2071   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2072     /* because of the column compression in the off-processor part of the matrix a->B,
2073        the number of columns in a->B and b->B may be different, hence we cannot call
2074        the MatCopy() directly on the two parts. If need be, we can provide a more
2075        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2076        then copying the submatrices */
2077     PetscCall(MatCopy_Basic(A, B, str));
2078   } else {
2079     PetscCall(MatCopy(a->A, b->A, str));
2080     PetscCall(MatCopy(a->B, b->B, str));
2081   }
2082   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2083   PetscFunctionReturn(PETSC_SUCCESS);
2084 }
2085 
2086 /*
2087    Computes the number of nonzeros per row needed for preallocation when X and Y
2088    have different nonzero structure.
2089 */
2090 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2091 {
2092   PetscInt i, j, k, nzx, nzy;
2093 
2094   PetscFunctionBegin;
2095   /* Set the number of nonzeros in the new matrix */
2096   for (i = 0; i < m; i++) {
2097     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2098     nzx    = xi[i + 1] - xi[i];
2099     nzy    = yi[i + 1] - yi[i];
2100     nnz[i] = 0;
2101     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2102       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2103       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2104       nnz[i]++;
2105     }
2106     for (; k < nzy; k++) nnz[i]++;
2107   }
2108   PetscFunctionReturn(PETSC_SUCCESS);
2109 }
2110 
2111 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2112 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2113 {
2114   PetscInt    m = Y->rmap->N;
2115   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2116   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2117 
2118   PetscFunctionBegin;
2119   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2120   PetscFunctionReturn(PETSC_SUCCESS);
2121 }
2122 
2123 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2124 {
2125   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2126 
2127   PetscFunctionBegin;
2128   if (str == SAME_NONZERO_PATTERN) {
2129     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2130     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2131   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2132     PetscCall(MatAXPY_Basic(Y, a, X, str));
2133   } else {
2134     Mat       B;
2135     PetscInt *nnz_d, *nnz_o;
2136 
2137     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2138     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2139     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2140     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2141     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2142     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2143     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2144     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2145     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2146     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2147     PetscCall(MatHeaderMerge(Y, &B));
2148     PetscCall(PetscFree(nnz_d));
2149     PetscCall(PetscFree(nnz_o));
2150   }
2151   PetscFunctionReturn(PETSC_SUCCESS);
2152 }
2153 
2154 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2155 
2156 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2157 {
2158   PetscFunctionBegin;
2159   if (PetscDefined(USE_COMPLEX)) {
2160     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2161 
2162     PetscCall(MatConjugate_SeqAIJ(aij->A));
2163     PetscCall(MatConjugate_SeqAIJ(aij->B));
2164   }
2165   PetscFunctionReturn(PETSC_SUCCESS);
2166 }
2167 
2168 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2169 {
2170   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2171 
2172   PetscFunctionBegin;
2173   PetscCall(MatRealPart(a->A));
2174   PetscCall(MatRealPart(a->B));
2175   PetscFunctionReturn(PETSC_SUCCESS);
2176 }
2177 
2178 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2179 {
2180   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2181 
2182   PetscFunctionBegin;
2183   PetscCall(MatImaginaryPart(a->A));
2184   PetscCall(MatImaginaryPart(a->B));
2185   PetscFunctionReturn(PETSC_SUCCESS);
2186 }
2187 
2188 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2189 {
2190   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2191   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2192   PetscScalar       *va, *vv;
2193   Vec                vB, vA;
2194   const PetscScalar *vb;
2195 
2196   PetscFunctionBegin;
2197   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2198   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2199 
2200   PetscCall(VecGetArrayWrite(vA, &va));
2201   if (idx) {
2202     for (i = 0; i < m; i++) {
2203       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2204     }
2205   }
2206 
2207   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2208   PetscCall(PetscMalloc1(m, &idxb));
2209   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2210 
2211   PetscCall(VecGetArrayWrite(v, &vv));
2212   PetscCall(VecGetArrayRead(vB, &vb));
2213   for (i = 0; i < m; i++) {
2214     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2215       vv[i] = vb[i];
2216       if (idx) idx[i] = a->garray[idxb[i]];
2217     } else {
2218       vv[i] = va[i];
2219       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2220     }
2221   }
2222   PetscCall(VecRestoreArrayWrite(vA, &vv));
2223   PetscCall(VecRestoreArrayWrite(vA, &va));
2224   PetscCall(VecRestoreArrayRead(vB, &vb));
2225   PetscCall(PetscFree(idxb));
2226   PetscCall(VecDestroy(&vA));
2227   PetscCall(VecDestroy(&vB));
2228   PetscFunctionReturn(PETSC_SUCCESS);
2229 }
2230 
2231 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2232 {
2233   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2234   PetscInt           m = A->rmap->n, n = A->cmap->n;
2235   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2236   PetscInt          *cmap = mat->garray;
2237   PetscInt          *diagIdx, *offdiagIdx;
2238   Vec                diagV, offdiagV;
2239   PetscScalar       *a, *diagA, *offdiagA;
2240   const PetscScalar *ba, *bav;
2241   PetscInt           r, j, col, ncols, *bi, *bj;
2242   Mat                B = mat->B;
2243   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2244 
2245   PetscFunctionBegin;
2246   /* When a process holds entire A and other processes have no entry */
2247   if (A->cmap->N == n) {
2248     PetscCall(VecGetArrayWrite(v, &diagA));
2249     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2250     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2251     PetscCall(VecDestroy(&diagV));
2252     PetscCall(VecRestoreArrayWrite(v, &diagA));
2253     PetscFunctionReturn(PETSC_SUCCESS);
2254   } else if (n == 0) {
2255     if (m) {
2256       PetscCall(VecGetArrayWrite(v, &a));
2257       for (r = 0; r < m; r++) {
2258         a[r] = 0.0;
2259         if (idx) idx[r] = -1;
2260       }
2261       PetscCall(VecRestoreArrayWrite(v, &a));
2262     }
2263     PetscFunctionReturn(PETSC_SUCCESS);
2264   }
2265 
2266   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2267   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2268   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2269   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2270 
2271   /* Get offdiagIdx[] for implicit 0.0 */
2272   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2273   ba = bav;
2274   bi = b->i;
2275   bj = b->j;
2276   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2277   for (r = 0; r < m; r++) {
2278     ncols = bi[r + 1] - bi[r];
2279     if (ncols == A->cmap->N - n) { /* Brow is dense */
2280       offdiagA[r]   = *ba;
2281       offdiagIdx[r] = cmap[0];
2282     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2283       offdiagA[r] = 0.0;
2284 
2285       /* Find first hole in the cmap */
2286       for (j = 0; j < ncols; j++) {
2287         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2288         if (col > j && j < cstart) {
2289           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2290           break;
2291         } else if (col > j + n && j >= cstart) {
2292           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2293           break;
2294         }
2295       }
2296       if (j == ncols && ncols < A->cmap->N - n) {
2297         /* a hole is outside compressed Bcols */
2298         if (ncols == 0) {
2299           if (cstart) {
2300             offdiagIdx[r] = 0;
2301           } else offdiagIdx[r] = cend;
2302         } else { /* ncols > 0 */
2303           offdiagIdx[r] = cmap[ncols - 1] + 1;
2304           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2305         }
2306       }
2307     }
2308 
2309     for (j = 0; j < ncols; j++) {
2310       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2311         offdiagA[r]   = *ba;
2312         offdiagIdx[r] = cmap[*bj];
2313       }
2314       ba++;
2315       bj++;
2316     }
2317   }
2318 
2319   PetscCall(VecGetArrayWrite(v, &a));
2320   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2321   for (r = 0; r < m; ++r) {
2322     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2323       a[r] = diagA[r];
2324       if (idx) idx[r] = cstart + diagIdx[r];
2325     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2326       a[r] = diagA[r];
2327       if (idx) {
2328         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2329           idx[r] = cstart + diagIdx[r];
2330         } else idx[r] = offdiagIdx[r];
2331       }
2332     } else {
2333       a[r] = offdiagA[r];
2334       if (idx) idx[r] = offdiagIdx[r];
2335     }
2336   }
2337   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2338   PetscCall(VecRestoreArrayWrite(v, &a));
2339   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2340   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2341   PetscCall(VecDestroy(&diagV));
2342   PetscCall(VecDestroy(&offdiagV));
2343   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2344   PetscFunctionReturn(PETSC_SUCCESS);
2345 }
2346 
2347 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2348 {
2349   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2350   PetscInt           m = A->rmap->n, n = A->cmap->n;
2351   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2352   PetscInt          *cmap = mat->garray;
2353   PetscInt          *diagIdx, *offdiagIdx;
2354   Vec                diagV, offdiagV;
2355   PetscScalar       *a, *diagA, *offdiagA;
2356   const PetscScalar *ba, *bav;
2357   PetscInt           r, j, col, ncols, *bi, *bj;
2358   Mat                B = mat->B;
2359   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2360 
2361   PetscFunctionBegin;
2362   /* When a process holds entire A and other processes have no entry */
2363   if (A->cmap->N == n) {
2364     PetscCall(VecGetArrayWrite(v, &diagA));
2365     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2366     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2367     PetscCall(VecDestroy(&diagV));
2368     PetscCall(VecRestoreArrayWrite(v, &diagA));
2369     PetscFunctionReturn(PETSC_SUCCESS);
2370   } else if (n == 0) {
2371     if (m) {
2372       PetscCall(VecGetArrayWrite(v, &a));
2373       for (r = 0; r < m; r++) {
2374         a[r] = PETSC_MAX_REAL;
2375         if (idx) idx[r] = -1;
2376       }
2377       PetscCall(VecRestoreArrayWrite(v, &a));
2378     }
2379     PetscFunctionReturn(PETSC_SUCCESS);
2380   }
2381 
2382   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2383   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2384   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2385   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2386 
2387   /* Get offdiagIdx[] for implicit 0.0 */
2388   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2389   ba = bav;
2390   bi = b->i;
2391   bj = b->j;
2392   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2393   for (r = 0; r < m; r++) {
2394     ncols = bi[r + 1] - bi[r];
2395     if (ncols == A->cmap->N - n) { /* Brow is dense */
2396       offdiagA[r]   = *ba;
2397       offdiagIdx[r] = cmap[0];
2398     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2399       offdiagA[r] = 0.0;
2400 
2401       /* Find first hole in the cmap */
2402       for (j = 0; j < ncols; j++) {
2403         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2404         if (col > j && j < cstart) {
2405           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2406           break;
2407         } else if (col > j + n && j >= cstart) {
2408           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2409           break;
2410         }
2411       }
2412       if (j == ncols && ncols < A->cmap->N - n) {
2413         /* a hole is outside compressed Bcols */
2414         if (ncols == 0) {
2415           if (cstart) {
2416             offdiagIdx[r] = 0;
2417           } else offdiagIdx[r] = cend;
2418         } else { /* ncols > 0 */
2419           offdiagIdx[r] = cmap[ncols - 1] + 1;
2420           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2421         }
2422       }
2423     }
2424 
2425     for (j = 0; j < ncols; j++) {
2426       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2427         offdiagA[r]   = *ba;
2428         offdiagIdx[r] = cmap[*bj];
2429       }
2430       ba++;
2431       bj++;
2432     }
2433   }
2434 
2435   PetscCall(VecGetArrayWrite(v, &a));
2436   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2437   for (r = 0; r < m; ++r) {
2438     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2439       a[r] = diagA[r];
2440       if (idx) idx[r] = cstart + diagIdx[r];
2441     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2442       a[r] = diagA[r];
2443       if (idx) {
2444         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2445           idx[r] = cstart + diagIdx[r];
2446         } else idx[r] = offdiagIdx[r];
2447       }
2448     } else {
2449       a[r] = offdiagA[r];
2450       if (idx) idx[r] = offdiagIdx[r];
2451     }
2452   }
2453   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2454   PetscCall(VecRestoreArrayWrite(v, &a));
2455   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2456   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2457   PetscCall(VecDestroy(&diagV));
2458   PetscCall(VecDestroy(&offdiagV));
2459   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2460   PetscFunctionReturn(PETSC_SUCCESS);
2461 }
2462 
2463 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2464 {
2465   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2466   PetscInt           m = A->rmap->n, n = A->cmap->n;
2467   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2468   PetscInt          *cmap = mat->garray;
2469   PetscInt          *diagIdx, *offdiagIdx;
2470   Vec                diagV, offdiagV;
2471   PetscScalar       *a, *diagA, *offdiagA;
2472   const PetscScalar *ba, *bav;
2473   PetscInt           r, j, col, ncols, *bi, *bj;
2474   Mat                B = mat->B;
2475   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2476 
2477   PetscFunctionBegin;
2478   /* When a process holds entire A and other processes have no entry */
2479   if (A->cmap->N == n) {
2480     PetscCall(VecGetArrayWrite(v, &diagA));
2481     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2482     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2483     PetscCall(VecDestroy(&diagV));
2484     PetscCall(VecRestoreArrayWrite(v, &diagA));
2485     PetscFunctionReturn(PETSC_SUCCESS);
2486   } else if (n == 0) {
2487     if (m) {
2488       PetscCall(VecGetArrayWrite(v, &a));
2489       for (r = 0; r < m; r++) {
2490         a[r] = PETSC_MIN_REAL;
2491         if (idx) idx[r] = -1;
2492       }
2493       PetscCall(VecRestoreArrayWrite(v, &a));
2494     }
2495     PetscFunctionReturn(PETSC_SUCCESS);
2496   }
2497 
2498   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2499   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2500   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2501   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2502 
2503   /* Get offdiagIdx[] for implicit 0.0 */
2504   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2505   ba = bav;
2506   bi = b->i;
2507   bj = b->j;
2508   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2509   for (r = 0; r < m; r++) {
2510     ncols = bi[r + 1] - bi[r];
2511     if (ncols == A->cmap->N - n) { /* Brow is dense */
2512       offdiagA[r]   = *ba;
2513       offdiagIdx[r] = cmap[0];
2514     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2515       offdiagA[r] = 0.0;
2516 
2517       /* Find first hole in the cmap */
2518       for (j = 0; j < ncols; j++) {
2519         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2520         if (col > j && j < cstart) {
2521           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2522           break;
2523         } else if (col > j + n && j >= cstart) {
2524           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2525           break;
2526         }
2527       }
2528       if (j == ncols && ncols < A->cmap->N - n) {
2529         /* a hole is outside compressed Bcols */
2530         if (ncols == 0) {
2531           if (cstart) {
2532             offdiagIdx[r] = 0;
2533           } else offdiagIdx[r] = cend;
2534         } else { /* ncols > 0 */
2535           offdiagIdx[r] = cmap[ncols - 1] + 1;
2536           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2537         }
2538       }
2539     }
2540 
2541     for (j = 0; j < ncols; j++) {
2542       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2543         offdiagA[r]   = *ba;
2544         offdiagIdx[r] = cmap[*bj];
2545       }
2546       ba++;
2547       bj++;
2548     }
2549   }
2550 
2551   PetscCall(VecGetArrayWrite(v, &a));
2552   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2553   for (r = 0; r < m; ++r) {
2554     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2555       a[r] = diagA[r];
2556       if (idx) idx[r] = cstart + diagIdx[r];
2557     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2558       a[r] = diagA[r];
2559       if (idx) {
2560         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2561           idx[r] = cstart + diagIdx[r];
2562         } else idx[r] = offdiagIdx[r];
2563       }
2564     } else {
2565       a[r] = offdiagA[r];
2566       if (idx) idx[r] = offdiagIdx[r];
2567     }
2568   }
2569   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2570   PetscCall(VecRestoreArrayWrite(v, &a));
2571   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2572   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2573   PetscCall(VecDestroy(&diagV));
2574   PetscCall(VecDestroy(&offdiagV));
2575   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2576   PetscFunctionReturn(PETSC_SUCCESS);
2577 }
2578 
2579 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2580 {
2581   Mat *dummy;
2582 
2583   PetscFunctionBegin;
2584   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2585   *newmat = *dummy;
2586   PetscCall(PetscFree(dummy));
2587   PetscFunctionReturn(PETSC_SUCCESS);
2588 }
2589 
2590 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2591 {
2592   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2593 
2594   PetscFunctionBegin;
2595   PetscCall(MatInvertBlockDiagonal(a->A, values));
2596   A->factorerrortype = a->A->factorerrortype;
2597   PetscFunctionReturn(PETSC_SUCCESS);
2598 }
2599 
2600 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2601 {
2602   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2603 
2604   PetscFunctionBegin;
2605   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2606   PetscCall(MatSetRandom(aij->A, rctx));
2607   if (x->assembled) {
2608     PetscCall(MatSetRandom(aij->B, rctx));
2609   } else {
2610     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2611   }
2612   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2613   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2614   PetscFunctionReturn(PETSC_SUCCESS);
2615 }
2616 
2617 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2618 {
2619   PetscFunctionBegin;
2620   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2621   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2622   PetscFunctionReturn(PETSC_SUCCESS);
2623 }
2624 
2625 /*@
2626    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2627 
2628    Not Collective
2629 
2630    Input Parameter:
2631 .    A - the matrix
2632 
2633    Output Parameter:
2634 .    nz - the number of nonzeros
2635 
2636  Level: advanced
2637 
2638 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `Mat`
2639 @*/
2640 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2641 {
2642   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2643   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2644 
2645   PetscFunctionBegin;
2646   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2647   PetscFunctionReturn(PETSC_SUCCESS);
2648 }
2649 
2650 /*@
2651    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2652 
2653    Collective
2654 
2655    Input Parameters:
2656 +    A - the matrix
2657 -    sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2658 
2659  Level: advanced
2660 
2661 .seealso: [](chapter_matrices), `Mat`, `Mat`, `MATMPIAIJ`
2662 @*/
2663 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2664 {
2665   PetscFunctionBegin;
2666   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2667   PetscFunctionReturn(PETSC_SUCCESS);
2668 }
2669 
2670 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2671 {
2672   PetscBool sc = PETSC_FALSE, flg;
2673 
2674   PetscFunctionBegin;
2675   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2676   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2677   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2678   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2679   PetscOptionsHeadEnd();
2680   PetscFunctionReturn(PETSC_SUCCESS);
2681 }
2682 
2683 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2684 {
2685   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2686   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2687 
2688   PetscFunctionBegin;
2689   if (!Y->preallocated) {
2690     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2691   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2692     PetscInt nonew = aij->nonew;
2693     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2694     aij->nonew = nonew;
2695   }
2696   PetscCall(MatShift_Basic(Y, a));
2697   PetscFunctionReturn(PETSC_SUCCESS);
2698 }
2699 
2700 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2701 {
2702   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2703 
2704   PetscFunctionBegin;
2705   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2706   PetscCall(MatMissingDiagonal(a->A, missing, d));
2707   if (d) {
2708     PetscInt rstart;
2709     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2710     *d += rstart;
2711   }
2712   PetscFunctionReturn(PETSC_SUCCESS);
2713 }
2714 
2715 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2716 {
2717   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2718 
2719   PetscFunctionBegin;
2720   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2721   PetscFunctionReturn(PETSC_SUCCESS);
2722 }
2723 
2724 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A)
2725 {
2726   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2727 
2728   PetscFunctionBegin;
2729   PetscCall(MatEliminateZeros(a->A));
2730   PetscCall(MatEliminateZeros(a->B));
2731   PetscFunctionReturn(PETSC_SUCCESS);
2732 }
2733 
2734 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2735                                        MatGetRow_MPIAIJ,
2736                                        MatRestoreRow_MPIAIJ,
2737                                        MatMult_MPIAIJ,
2738                                        /* 4*/ MatMultAdd_MPIAIJ,
2739                                        MatMultTranspose_MPIAIJ,
2740                                        MatMultTransposeAdd_MPIAIJ,
2741                                        NULL,
2742                                        NULL,
2743                                        NULL,
2744                                        /*10*/ NULL,
2745                                        NULL,
2746                                        NULL,
2747                                        MatSOR_MPIAIJ,
2748                                        MatTranspose_MPIAIJ,
2749                                        /*15*/ MatGetInfo_MPIAIJ,
2750                                        MatEqual_MPIAIJ,
2751                                        MatGetDiagonal_MPIAIJ,
2752                                        MatDiagonalScale_MPIAIJ,
2753                                        MatNorm_MPIAIJ,
2754                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2755                                        MatAssemblyEnd_MPIAIJ,
2756                                        MatSetOption_MPIAIJ,
2757                                        MatZeroEntries_MPIAIJ,
2758                                        /*24*/ MatZeroRows_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        /*29*/ MatSetUp_MPI_Hash,
2764                                        NULL,
2765                                        NULL,
2766                                        MatGetDiagonalBlock_MPIAIJ,
2767                                        NULL,
2768                                        /*34*/ MatDuplicate_MPIAIJ,
2769                                        NULL,
2770                                        NULL,
2771                                        NULL,
2772                                        NULL,
2773                                        /*39*/ MatAXPY_MPIAIJ,
2774                                        MatCreateSubMatrices_MPIAIJ,
2775                                        MatIncreaseOverlap_MPIAIJ,
2776                                        MatGetValues_MPIAIJ,
2777                                        MatCopy_MPIAIJ,
2778                                        /*44*/ MatGetRowMax_MPIAIJ,
2779                                        MatScale_MPIAIJ,
2780                                        MatShift_MPIAIJ,
2781                                        MatDiagonalSet_MPIAIJ,
2782                                        MatZeroRowsColumns_MPIAIJ,
2783                                        /*49*/ MatSetRandom_MPIAIJ,
2784                                        MatGetRowIJ_MPIAIJ,
2785                                        MatRestoreRowIJ_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2789                                        NULL,
2790                                        MatSetUnfactored_MPIAIJ,
2791                                        MatPermute_MPIAIJ,
2792                                        NULL,
2793                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2794                                        MatDestroy_MPIAIJ,
2795                                        MatView_MPIAIJ,
2796                                        NULL,
2797                                        NULL,
2798                                        /*64*/ NULL,
2799                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2800                                        NULL,
2801                                        NULL,
2802                                        NULL,
2803                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2804                                        MatGetRowMinAbs_MPIAIJ,
2805                                        NULL,
2806                                        NULL,
2807                                        NULL,
2808                                        NULL,
2809                                        /*75*/ MatFDColoringApply_AIJ,
2810                                        MatSetFromOptions_MPIAIJ,
2811                                        NULL,
2812                                        NULL,
2813                                        MatFindZeroDiagonals_MPIAIJ,
2814                                        /*80*/ NULL,
2815                                        NULL,
2816                                        NULL,
2817                                        /*83*/ MatLoad_MPIAIJ,
2818                                        MatIsSymmetric_MPIAIJ,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                        NULL,
2823                                        /*89*/ NULL,
2824                                        NULL,
2825                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2826                                        NULL,
2827                                        NULL,
2828                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2829                                        NULL,
2830                                        NULL,
2831                                        NULL,
2832                                        MatBindToCPU_MPIAIJ,
2833                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2834                                        NULL,
2835                                        NULL,
2836                                        MatConjugate_MPIAIJ,
2837                                        NULL,
2838                                        /*104*/ MatSetValuesRow_MPIAIJ,
2839                                        MatRealPart_MPIAIJ,
2840                                        MatImaginaryPart_MPIAIJ,
2841                                        NULL,
2842                                        NULL,
2843                                        /*109*/ NULL,
2844                                        NULL,
2845                                        MatGetRowMin_MPIAIJ,
2846                                        NULL,
2847                                        MatMissingDiagonal_MPIAIJ,
2848                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2849                                        NULL,
2850                                        MatGetGhosts_MPIAIJ,
2851                                        NULL,
2852                                        NULL,
2853                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2854                                        NULL,
2855                                        NULL,
2856                                        NULL,
2857                                        MatGetMultiProcBlock_MPIAIJ,
2858                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2859                                        MatGetColumnReductions_MPIAIJ,
2860                                        MatInvertBlockDiagonal_MPIAIJ,
2861                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2862                                        MatCreateSubMatricesMPI_MPIAIJ,
2863                                        /*129*/ NULL,
2864                                        NULL,
2865                                        NULL,
2866                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2867                                        NULL,
2868                                        /*134*/ NULL,
2869                                        NULL,
2870                                        NULL,
2871                                        NULL,
2872                                        NULL,
2873                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2874                                        NULL,
2875                                        NULL,
2876                                        MatFDColoringSetUp_MPIXAIJ,
2877                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2878                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2879                                        /*145*/ NULL,
2880                                        NULL,
2881                                        NULL,
2882                                        MatCreateGraph_Simple_AIJ,
2883                                        NULL,
2884                                        /*150*/ NULL,
2885                                        MatEliminateZeros_MPIAIJ};
2886 
2887 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2888 {
2889   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2890 
2891   PetscFunctionBegin;
2892   PetscCall(MatStoreValues(aij->A));
2893   PetscCall(MatStoreValues(aij->B));
2894   PetscFunctionReturn(PETSC_SUCCESS);
2895 }
2896 
2897 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2898 {
2899   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2900 
2901   PetscFunctionBegin;
2902   PetscCall(MatRetrieveValues(aij->A));
2903   PetscCall(MatRetrieveValues(aij->B));
2904   PetscFunctionReturn(PETSC_SUCCESS);
2905 }
2906 
2907 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2908 {
2909   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2910   PetscMPIInt size;
2911 
2912   PetscFunctionBegin;
2913   if (B->hash_active) {
2914     PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops))));
2915     B->hash_active = PETSC_FALSE;
2916   }
2917   PetscCall(PetscLayoutSetUp(B->rmap));
2918   PetscCall(PetscLayoutSetUp(B->cmap));
2919 
2920 #if defined(PETSC_USE_CTABLE)
2921   PetscCall(PetscHMapIDestroy(&b->colmap));
2922 #else
2923   PetscCall(PetscFree(b->colmap));
2924 #endif
2925   PetscCall(PetscFree(b->garray));
2926   PetscCall(VecDestroy(&b->lvec));
2927   PetscCall(VecScatterDestroy(&b->Mvctx));
2928 
2929   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2930   PetscCall(MatDestroy(&b->B));
2931   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2932   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2933   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2934   PetscCall(MatSetType(b->B, MATSEQAIJ));
2935 
2936   PetscCall(MatDestroy(&b->A));
2937   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2938   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2939   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2940   PetscCall(MatSetType(b->A, MATSEQAIJ));
2941 
2942   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2943   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2944   B->preallocated  = PETSC_TRUE;
2945   B->was_assembled = PETSC_FALSE;
2946   B->assembled     = PETSC_FALSE;
2947   PetscFunctionReturn(PETSC_SUCCESS);
2948 }
2949 
2950 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2951 {
2952   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2953 
2954   PetscFunctionBegin;
2955   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2956   PetscCall(PetscLayoutSetUp(B->rmap));
2957   PetscCall(PetscLayoutSetUp(B->cmap));
2958 
2959 #if defined(PETSC_USE_CTABLE)
2960   PetscCall(PetscHMapIDestroy(&b->colmap));
2961 #else
2962   PetscCall(PetscFree(b->colmap));
2963 #endif
2964   PetscCall(PetscFree(b->garray));
2965   PetscCall(VecDestroy(&b->lvec));
2966   PetscCall(VecScatterDestroy(&b->Mvctx));
2967 
2968   PetscCall(MatResetPreallocation(b->A));
2969   PetscCall(MatResetPreallocation(b->B));
2970   B->preallocated  = PETSC_TRUE;
2971   B->was_assembled = PETSC_FALSE;
2972   B->assembled     = PETSC_FALSE;
2973   PetscFunctionReturn(PETSC_SUCCESS);
2974 }
2975 
2976 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2977 {
2978   Mat         mat;
2979   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2980 
2981   PetscFunctionBegin;
2982   *newmat = NULL;
2983   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2984   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2985   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2986   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2987   a = (Mat_MPIAIJ *)mat->data;
2988 
2989   mat->factortype   = matin->factortype;
2990   mat->assembled    = matin->assembled;
2991   mat->insertmode   = NOT_SET_VALUES;
2992   mat->preallocated = matin->preallocated;
2993 
2994   a->size         = oldmat->size;
2995   a->rank         = oldmat->rank;
2996   a->donotstash   = oldmat->donotstash;
2997   a->roworiented  = oldmat->roworiented;
2998   a->rowindices   = NULL;
2999   a->rowvalues    = NULL;
3000   a->getrowactive = PETSC_FALSE;
3001 
3002   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3003   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3004 
3005   if (oldmat->colmap) {
3006 #if defined(PETSC_USE_CTABLE)
3007     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3008 #else
3009     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3010     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3011 #endif
3012   } else a->colmap = NULL;
3013   if (oldmat->garray) {
3014     PetscInt len;
3015     len = oldmat->B->cmap->n;
3016     PetscCall(PetscMalloc1(len + 1, &a->garray));
3017     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3018   } else a->garray = NULL;
3019 
3020   /* It may happen MatDuplicate is called with a non-assembled matrix
3021      In fact, MatDuplicate only requires the matrix to be preallocated
3022      This may happen inside a DMCreateMatrix_Shell */
3023   if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3024   if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3025   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3026   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3027   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3028   *newmat = mat;
3029   PetscFunctionReturn(PETSC_SUCCESS);
3030 }
3031 
3032 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3033 {
3034   PetscBool isbinary, ishdf5;
3035 
3036   PetscFunctionBegin;
3037   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3038   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3039   /* force binary viewer to load .info file if it has not yet done so */
3040   PetscCall(PetscViewerSetUp(viewer));
3041   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3042   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3043   if (isbinary) {
3044     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3045   } else if (ishdf5) {
3046 #if defined(PETSC_HAVE_HDF5)
3047     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3048 #else
3049     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3050 #endif
3051   } else {
3052     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3053   }
3054   PetscFunctionReturn(PETSC_SUCCESS);
3055 }
3056 
3057 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3058 {
3059   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3060   PetscInt    *rowidxs, *colidxs;
3061   PetscScalar *matvals;
3062 
3063   PetscFunctionBegin;
3064   PetscCall(PetscViewerSetUp(viewer));
3065 
3066   /* read in matrix header */
3067   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3068   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3069   M  = header[1];
3070   N  = header[2];
3071   nz = header[3];
3072   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3073   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3074   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3075 
3076   /* set block sizes from the viewer's .info file */
3077   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3078   /* set global sizes if not set already */
3079   if (mat->rmap->N < 0) mat->rmap->N = M;
3080   if (mat->cmap->N < 0) mat->cmap->N = N;
3081   PetscCall(PetscLayoutSetUp(mat->rmap));
3082   PetscCall(PetscLayoutSetUp(mat->cmap));
3083 
3084   /* check if the matrix sizes are correct */
3085   PetscCall(MatGetSize(mat, &rows, &cols));
3086   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3087 
3088   /* read in row lengths and build row indices */
3089   PetscCall(MatGetLocalSize(mat, &m, NULL));
3090   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3091   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3092   rowidxs[0] = 0;
3093   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3094   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3095   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3096   /* read in column indices and matrix values */
3097   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3098   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3099   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3100   /* store matrix indices and values */
3101   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3102   PetscCall(PetscFree(rowidxs));
3103   PetscCall(PetscFree2(colidxs, matvals));
3104   PetscFunctionReturn(PETSC_SUCCESS);
3105 }
3106 
3107 /* Not scalable because of ISAllGather() unless getting all columns. */
3108 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3109 {
3110   IS          iscol_local;
3111   PetscBool   isstride;
3112   PetscMPIInt lisstride = 0, gisstride;
3113 
3114   PetscFunctionBegin;
3115   /* check if we are grabbing all columns*/
3116   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3117 
3118   if (isstride) {
3119     PetscInt start, len, mstart, mlen;
3120     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3121     PetscCall(ISGetLocalSize(iscol, &len));
3122     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3123     if (mstart == start && mlen - mstart == len) lisstride = 1;
3124   }
3125 
3126   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3127   if (gisstride) {
3128     PetscInt N;
3129     PetscCall(MatGetSize(mat, NULL, &N));
3130     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3131     PetscCall(ISSetIdentity(iscol_local));
3132     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3133   } else {
3134     PetscInt cbs;
3135     PetscCall(ISGetBlockSize(iscol, &cbs));
3136     PetscCall(ISAllGather(iscol, &iscol_local));
3137     PetscCall(ISSetBlockSize(iscol_local, cbs));
3138   }
3139 
3140   *isseq = iscol_local;
3141   PetscFunctionReturn(PETSC_SUCCESS);
3142 }
3143 
3144 /*
3145  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3146  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3147 
3148  Input Parameters:
3149 +   mat - matrix
3150 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3151            i.e., mat->rstart <= isrow[i] < mat->rend
3152 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3153            i.e., mat->cstart <= iscol[i] < mat->cend
3154 
3155  Output Parameters:
3156 +   isrow_d - sequential row index set for retrieving mat->A
3157 .   iscol_d - sequential  column index set for retrieving mat->A
3158 .   iscol_o - sequential column index set for retrieving mat->B
3159 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3160  */
3161 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3162 {
3163   Vec             x, cmap;
3164   const PetscInt *is_idx;
3165   PetscScalar    *xarray, *cmaparray;
3166   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3167   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3168   Mat             B    = a->B;
3169   Vec             lvec = a->lvec, lcmap;
3170   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3171   MPI_Comm        comm;
3172   VecScatter      Mvctx = a->Mvctx;
3173 
3174   PetscFunctionBegin;
3175   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3176   PetscCall(ISGetLocalSize(iscol, &ncols));
3177 
3178   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3179   PetscCall(MatCreateVecs(mat, &x, NULL));
3180   PetscCall(VecSet(x, -1.0));
3181   PetscCall(VecDuplicate(x, &cmap));
3182   PetscCall(VecSet(cmap, -1.0));
3183 
3184   /* Get start indices */
3185   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3186   isstart -= ncols;
3187   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3188 
3189   PetscCall(ISGetIndices(iscol, &is_idx));
3190   PetscCall(VecGetArray(x, &xarray));
3191   PetscCall(VecGetArray(cmap, &cmaparray));
3192   PetscCall(PetscMalloc1(ncols, &idx));
3193   for (i = 0; i < ncols; i++) {
3194     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3195     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3196     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3197   }
3198   PetscCall(VecRestoreArray(x, &xarray));
3199   PetscCall(VecRestoreArray(cmap, &cmaparray));
3200   PetscCall(ISRestoreIndices(iscol, &is_idx));
3201 
3202   /* Get iscol_d */
3203   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3204   PetscCall(ISGetBlockSize(iscol, &i));
3205   PetscCall(ISSetBlockSize(*iscol_d, i));
3206 
3207   /* Get isrow_d */
3208   PetscCall(ISGetLocalSize(isrow, &m));
3209   rstart = mat->rmap->rstart;
3210   PetscCall(PetscMalloc1(m, &idx));
3211   PetscCall(ISGetIndices(isrow, &is_idx));
3212   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3213   PetscCall(ISRestoreIndices(isrow, &is_idx));
3214 
3215   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3216   PetscCall(ISGetBlockSize(isrow, &i));
3217   PetscCall(ISSetBlockSize(*isrow_d, i));
3218 
3219   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3220   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3221   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3222 
3223   PetscCall(VecDuplicate(lvec, &lcmap));
3224 
3225   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3226   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3227 
3228   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3229   /* off-process column indices */
3230   count = 0;
3231   PetscCall(PetscMalloc1(Bn, &idx));
3232   PetscCall(PetscMalloc1(Bn, &cmap1));
3233 
3234   PetscCall(VecGetArray(lvec, &xarray));
3235   PetscCall(VecGetArray(lcmap, &cmaparray));
3236   for (i = 0; i < Bn; i++) {
3237     if (PetscRealPart(xarray[i]) > -1.0) {
3238       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3239       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3240       count++;
3241     }
3242   }
3243   PetscCall(VecRestoreArray(lvec, &xarray));
3244   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3245 
3246   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3247   /* cannot ensure iscol_o has same blocksize as iscol! */
3248 
3249   PetscCall(PetscFree(idx));
3250   *garray = cmap1;
3251 
3252   PetscCall(VecDestroy(&x));
3253   PetscCall(VecDestroy(&cmap));
3254   PetscCall(VecDestroy(&lcmap));
3255   PetscFunctionReturn(PETSC_SUCCESS);
3256 }
3257 
3258 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3259 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3260 {
3261   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3262   Mat         M = NULL;
3263   MPI_Comm    comm;
3264   IS          iscol_d, isrow_d, iscol_o;
3265   Mat         Asub = NULL, Bsub = NULL;
3266   PetscInt    n;
3267 
3268   PetscFunctionBegin;
3269   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3270 
3271   if (call == MAT_REUSE_MATRIX) {
3272     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3273     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3274     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3275 
3276     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3277     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3278 
3279     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3280     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3281 
3282     /* Update diagonal and off-diagonal portions of submat */
3283     asub = (Mat_MPIAIJ *)(*submat)->data;
3284     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3285     PetscCall(ISGetLocalSize(iscol_o, &n));
3286     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3287     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3288     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3289 
3290   } else { /* call == MAT_INITIAL_MATRIX) */
3291     const PetscInt *garray;
3292     PetscInt        BsubN;
3293 
3294     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3295     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3296 
3297     /* Create local submatrices Asub and Bsub */
3298     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3299     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3300 
3301     /* Create submatrix M */
3302     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3303 
3304     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3305     asub = (Mat_MPIAIJ *)M->data;
3306 
3307     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3308     n = asub->B->cmap->N;
3309     if (BsubN > n) {
3310       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3311       const PetscInt *idx;
3312       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3313       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3314 
3315       PetscCall(PetscMalloc1(n, &idx_new));
3316       j = 0;
3317       PetscCall(ISGetIndices(iscol_o, &idx));
3318       for (i = 0; i < n; i++) {
3319         if (j >= BsubN) break;
3320         while (subgarray[i] > garray[j]) j++;
3321 
3322         if (subgarray[i] == garray[j]) {
3323           idx_new[i] = idx[j++];
3324         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3325       }
3326       PetscCall(ISRestoreIndices(iscol_o, &idx));
3327 
3328       PetscCall(ISDestroy(&iscol_o));
3329       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3330 
3331     } else if (BsubN < n) {
3332       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3333     }
3334 
3335     PetscCall(PetscFree(garray));
3336     *submat = M;
3337 
3338     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3339     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3340     PetscCall(ISDestroy(&isrow_d));
3341 
3342     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3343     PetscCall(ISDestroy(&iscol_d));
3344 
3345     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3346     PetscCall(ISDestroy(&iscol_o));
3347   }
3348   PetscFunctionReturn(PETSC_SUCCESS);
3349 }
3350 
3351 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3352 {
3353   IS        iscol_local = NULL, isrow_d;
3354   PetscInt  csize;
3355   PetscInt  n, i, j, start, end;
3356   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3357   MPI_Comm  comm;
3358 
3359   PetscFunctionBegin;
3360   /* If isrow has same processor distribution as mat,
3361      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3362   if (call == MAT_REUSE_MATRIX) {
3363     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3364     if (isrow_d) {
3365       sameRowDist  = PETSC_TRUE;
3366       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3367     } else {
3368       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3369       if (iscol_local) {
3370         sameRowDist  = PETSC_TRUE;
3371         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3372       }
3373     }
3374   } else {
3375     /* Check if isrow has same processor distribution as mat */
3376     sameDist[0] = PETSC_FALSE;
3377     PetscCall(ISGetLocalSize(isrow, &n));
3378     if (!n) {
3379       sameDist[0] = PETSC_TRUE;
3380     } else {
3381       PetscCall(ISGetMinMax(isrow, &i, &j));
3382       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3383       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3384     }
3385 
3386     /* Check if iscol has same processor distribution as mat */
3387     sameDist[1] = PETSC_FALSE;
3388     PetscCall(ISGetLocalSize(iscol, &n));
3389     if (!n) {
3390       sameDist[1] = PETSC_TRUE;
3391     } else {
3392       PetscCall(ISGetMinMax(iscol, &i, &j));
3393       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3394       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3395     }
3396 
3397     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3398     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3399     sameRowDist = tsameDist[0];
3400   }
3401 
3402   if (sameRowDist) {
3403     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3404       /* isrow and iscol have same processor distribution as mat */
3405       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3406       PetscFunctionReturn(PETSC_SUCCESS);
3407     } else { /* sameRowDist */
3408       /* isrow has same processor distribution as mat */
3409       if (call == MAT_INITIAL_MATRIX) {
3410         PetscBool sorted;
3411         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3412         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3413         PetscCall(ISGetSize(iscol, &i));
3414         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3415 
3416         PetscCall(ISSorted(iscol_local, &sorted));
3417         if (sorted) {
3418           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3419           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3420           PetscFunctionReturn(PETSC_SUCCESS);
3421         }
3422       } else { /* call == MAT_REUSE_MATRIX */
3423         IS iscol_sub;
3424         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3425         if (iscol_sub) {
3426           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3427           PetscFunctionReturn(PETSC_SUCCESS);
3428         }
3429       }
3430     }
3431   }
3432 
3433   /* General case: iscol -> iscol_local which has global size of iscol */
3434   if (call == MAT_REUSE_MATRIX) {
3435     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3436     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3437   } else {
3438     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3439   }
3440 
3441   PetscCall(ISGetLocalSize(iscol, &csize));
3442   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3443 
3444   if (call == MAT_INITIAL_MATRIX) {
3445     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3446     PetscCall(ISDestroy(&iscol_local));
3447   }
3448   PetscFunctionReturn(PETSC_SUCCESS);
3449 }
3450 
3451 /*@C
3452      MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3453          and "off-diagonal" part of the matrix in CSR format.
3454 
3455    Collective
3456 
3457    Input Parameters:
3458 +  comm - MPI communicator
3459 .  A - "diagonal" portion of matrix
3460 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3461 -  garray - global index of `B` columns
3462 
3463    Output Parameter:
3464 .   mat - the matrix, with input `A` as its local diagonal matrix
3465 
3466   Level: advanced
3467 
3468    Notes:
3469    See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3470 
3471    `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3472 
3473 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3474 @*/
3475 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3476 {
3477   Mat_MPIAIJ        *maij;
3478   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3479   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3480   const PetscScalar *oa;
3481   Mat                Bnew;
3482   PetscInt           m, n, N;
3483   MatType            mpi_mat_type;
3484 
3485   PetscFunctionBegin;
3486   PetscCall(MatCreate(comm, mat));
3487   PetscCall(MatGetSize(A, &m, &n));
3488   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3489   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3490   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3491   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3492 
3493   /* Get global columns of mat */
3494   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3495 
3496   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3497   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3498   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3499   PetscCall(MatSetType(*mat, mpi_mat_type));
3500 
3501   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3502   maij = (Mat_MPIAIJ *)(*mat)->data;
3503 
3504   (*mat)->preallocated = PETSC_TRUE;
3505 
3506   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3507   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3508 
3509   /* Set A as diagonal portion of *mat */
3510   maij->A = A;
3511 
3512   nz = oi[m];
3513   for (i = 0; i < nz; i++) {
3514     col   = oj[i];
3515     oj[i] = garray[col];
3516   }
3517 
3518   /* Set Bnew as off-diagonal portion of *mat */
3519   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3520   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3521   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3522   bnew        = (Mat_SeqAIJ *)Bnew->data;
3523   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3524   maij->B     = Bnew;
3525 
3526   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3527 
3528   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3529   b->free_a       = PETSC_FALSE;
3530   b->free_ij      = PETSC_FALSE;
3531   PetscCall(MatDestroy(&B));
3532 
3533   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3534   bnew->free_a       = PETSC_TRUE;
3535   bnew->free_ij      = PETSC_TRUE;
3536 
3537   /* condense columns of maij->B */
3538   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3539   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3540   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3541   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3542   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3543   PetscFunctionReturn(PETSC_SUCCESS);
3544 }
3545 
3546 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3547 
3548 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3549 {
3550   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3551   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3552   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3553   Mat             M, Msub, B = a->B;
3554   MatScalar      *aa;
3555   Mat_SeqAIJ     *aij;
3556   PetscInt       *garray = a->garray, *colsub, Ncols;
3557   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3558   IS              iscol_sub, iscmap;
3559   const PetscInt *is_idx, *cmap;
3560   PetscBool       allcolumns = PETSC_FALSE;
3561   MPI_Comm        comm;
3562 
3563   PetscFunctionBegin;
3564   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3565   if (call == MAT_REUSE_MATRIX) {
3566     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3567     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3568     PetscCall(ISGetLocalSize(iscol_sub, &count));
3569 
3570     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3571     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3572 
3573     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3574     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3575 
3576     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3577 
3578   } else { /* call == MAT_INITIAL_MATRIX) */
3579     PetscBool flg;
3580 
3581     PetscCall(ISGetLocalSize(iscol, &n));
3582     PetscCall(ISGetSize(iscol, &Ncols));
3583 
3584     /* (1) iscol -> nonscalable iscol_local */
3585     /* Check for special case: each processor gets entire matrix columns */
3586     PetscCall(ISIdentity(iscol_local, &flg));
3587     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3588     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3589     if (allcolumns) {
3590       iscol_sub = iscol_local;
3591       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3592       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3593 
3594     } else {
3595       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3596       PetscInt *idx, *cmap1, k;
3597       PetscCall(PetscMalloc1(Ncols, &idx));
3598       PetscCall(PetscMalloc1(Ncols, &cmap1));
3599       PetscCall(ISGetIndices(iscol_local, &is_idx));
3600       count = 0;
3601       k     = 0;
3602       for (i = 0; i < Ncols; i++) {
3603         j = is_idx[i];
3604         if (j >= cstart && j < cend) {
3605           /* diagonal part of mat */
3606           idx[count]     = j;
3607           cmap1[count++] = i; /* column index in submat */
3608         } else if (Bn) {
3609           /* off-diagonal part of mat */
3610           if (j == garray[k]) {
3611             idx[count]     = j;
3612             cmap1[count++] = i; /* column index in submat */
3613           } else if (j > garray[k]) {
3614             while (j > garray[k] && k < Bn - 1) k++;
3615             if (j == garray[k]) {
3616               idx[count]     = j;
3617               cmap1[count++] = i; /* column index in submat */
3618             }
3619           }
3620         }
3621       }
3622       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3623 
3624       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3625       PetscCall(ISGetBlockSize(iscol, &cbs));
3626       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3627 
3628       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3629     }
3630 
3631     /* (3) Create sequential Msub */
3632     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3633   }
3634 
3635   PetscCall(ISGetLocalSize(iscol_sub, &count));
3636   aij = (Mat_SeqAIJ *)(Msub)->data;
3637   ii  = aij->i;
3638   PetscCall(ISGetIndices(iscmap, &cmap));
3639 
3640   /*
3641       m - number of local rows
3642       Ncols - number of columns (same on all processors)
3643       rstart - first row in new global matrix generated
3644   */
3645   PetscCall(MatGetSize(Msub, &m, NULL));
3646 
3647   if (call == MAT_INITIAL_MATRIX) {
3648     /* (4) Create parallel newmat */
3649     PetscMPIInt rank, size;
3650     PetscInt    csize;
3651 
3652     PetscCallMPI(MPI_Comm_size(comm, &size));
3653     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3654 
3655     /*
3656         Determine the number of non-zeros in the diagonal and off-diagonal
3657         portions of the matrix in order to do correct preallocation
3658     */
3659 
3660     /* first get start and end of "diagonal" columns */
3661     PetscCall(ISGetLocalSize(iscol, &csize));
3662     if (csize == PETSC_DECIDE) {
3663       PetscCall(ISGetSize(isrow, &mglobal));
3664       if (mglobal == Ncols) { /* square matrix */
3665         nlocal = m;
3666       } else {
3667         nlocal = Ncols / size + ((Ncols % size) > rank);
3668       }
3669     } else {
3670       nlocal = csize;
3671     }
3672     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3673     rstart = rend - nlocal;
3674     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3675 
3676     /* next, compute all the lengths */
3677     jj = aij->j;
3678     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3679     olens = dlens + m;
3680     for (i = 0; i < m; i++) {
3681       jend = ii[i + 1] - ii[i];
3682       olen = 0;
3683       dlen = 0;
3684       for (j = 0; j < jend; j++) {
3685         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3686         else dlen++;
3687         jj++;
3688       }
3689       olens[i] = olen;
3690       dlens[i] = dlen;
3691     }
3692 
3693     PetscCall(ISGetBlockSize(isrow, &bs));
3694     PetscCall(ISGetBlockSize(iscol, &cbs));
3695 
3696     PetscCall(MatCreate(comm, &M));
3697     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3698     PetscCall(MatSetBlockSizes(M, bs, cbs));
3699     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3700     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3701     PetscCall(PetscFree(dlens));
3702 
3703   } else { /* call == MAT_REUSE_MATRIX */
3704     M = *newmat;
3705     PetscCall(MatGetLocalSize(M, &i, NULL));
3706     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3707     PetscCall(MatZeroEntries(M));
3708     /*
3709          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3710        rather than the slower MatSetValues().
3711     */
3712     M->was_assembled = PETSC_TRUE;
3713     M->assembled     = PETSC_FALSE;
3714   }
3715 
3716   /* (5) Set values of Msub to *newmat */
3717   PetscCall(PetscMalloc1(count, &colsub));
3718   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3719 
3720   jj = aij->j;
3721   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3722   for (i = 0; i < m; i++) {
3723     row = rstart + i;
3724     nz  = ii[i + 1] - ii[i];
3725     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3726     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3727     jj += nz;
3728     aa += nz;
3729   }
3730   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3731   PetscCall(ISRestoreIndices(iscmap, &cmap));
3732 
3733   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3734   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3735 
3736   PetscCall(PetscFree(colsub));
3737 
3738   /* save Msub, iscol_sub and iscmap used in processor for next request */
3739   if (call == MAT_INITIAL_MATRIX) {
3740     *newmat = M;
3741     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3742     PetscCall(MatDestroy(&Msub));
3743 
3744     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3745     PetscCall(ISDestroy(&iscol_sub));
3746 
3747     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3748     PetscCall(ISDestroy(&iscmap));
3749 
3750     if (iscol_local) {
3751       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3752       PetscCall(ISDestroy(&iscol_local));
3753     }
3754   }
3755   PetscFunctionReturn(PETSC_SUCCESS);
3756 }
3757 
3758 /*
3759     Not great since it makes two copies of the submatrix, first an SeqAIJ
3760   in local and then by concatenating the local matrices the end result.
3761   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3762 
3763   This requires a sequential iscol with all indices.
3764 */
3765 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3766 {
3767   PetscMPIInt rank, size;
3768   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3769   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3770   Mat         M, Mreuse;
3771   MatScalar  *aa, *vwork;
3772   MPI_Comm    comm;
3773   Mat_SeqAIJ *aij;
3774   PetscBool   colflag, allcolumns = PETSC_FALSE;
3775 
3776   PetscFunctionBegin;
3777   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3778   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3779   PetscCallMPI(MPI_Comm_size(comm, &size));
3780 
3781   /* Check for special case: each processor gets entire matrix columns */
3782   PetscCall(ISIdentity(iscol, &colflag));
3783   PetscCall(ISGetLocalSize(iscol, &n));
3784   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3785   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3786 
3787   if (call == MAT_REUSE_MATRIX) {
3788     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3789     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3790     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3791   } else {
3792     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3793   }
3794 
3795   /*
3796       m - number of local rows
3797       n - number of columns (same on all processors)
3798       rstart - first row in new global matrix generated
3799   */
3800   PetscCall(MatGetSize(Mreuse, &m, &n));
3801   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3802   if (call == MAT_INITIAL_MATRIX) {
3803     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3804     ii  = aij->i;
3805     jj  = aij->j;
3806 
3807     /*
3808         Determine the number of non-zeros in the diagonal and off-diagonal
3809         portions of the matrix in order to do correct preallocation
3810     */
3811 
3812     /* first get start and end of "diagonal" columns */
3813     if (csize == PETSC_DECIDE) {
3814       PetscCall(ISGetSize(isrow, &mglobal));
3815       if (mglobal == n) { /* square matrix */
3816         nlocal = m;
3817       } else {
3818         nlocal = n / size + ((n % size) > rank);
3819       }
3820     } else {
3821       nlocal = csize;
3822     }
3823     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3824     rstart = rend - nlocal;
3825     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3826 
3827     /* next, compute all the lengths */
3828     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3829     olens = dlens + m;
3830     for (i = 0; i < m; i++) {
3831       jend = ii[i + 1] - ii[i];
3832       olen = 0;
3833       dlen = 0;
3834       for (j = 0; j < jend; j++) {
3835         if (*jj < rstart || *jj >= rend) olen++;
3836         else dlen++;
3837         jj++;
3838       }
3839       olens[i] = olen;
3840       dlens[i] = dlen;
3841     }
3842     PetscCall(MatCreate(comm, &M));
3843     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3844     PetscCall(MatSetBlockSizes(M, bs, cbs));
3845     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3846     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3847     PetscCall(PetscFree(dlens));
3848   } else {
3849     PetscInt ml, nl;
3850 
3851     M = *newmat;
3852     PetscCall(MatGetLocalSize(M, &ml, &nl));
3853     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3854     PetscCall(MatZeroEntries(M));
3855     /*
3856          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3857        rather than the slower MatSetValues().
3858     */
3859     M->was_assembled = PETSC_TRUE;
3860     M->assembled     = PETSC_FALSE;
3861   }
3862   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3863   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3864   ii  = aij->i;
3865   jj  = aij->j;
3866 
3867   /* trigger copy to CPU if needed */
3868   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3869   for (i = 0; i < m; i++) {
3870     row   = rstart + i;
3871     nz    = ii[i + 1] - ii[i];
3872     cwork = jj;
3873     jj += nz;
3874     vwork = aa;
3875     aa += nz;
3876     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3877   }
3878   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3879 
3880   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3881   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3882   *newmat = M;
3883 
3884   /* save submatrix used in processor for next request */
3885   if (call == MAT_INITIAL_MATRIX) {
3886     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3887     PetscCall(MatDestroy(&Mreuse));
3888   }
3889   PetscFunctionReturn(PETSC_SUCCESS);
3890 }
3891 
3892 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3893 {
3894   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3895   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3896   const PetscInt *JJ;
3897   PetscBool       nooffprocentries;
3898   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3899 
3900   PetscFunctionBegin;
3901   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3902 
3903   PetscCall(PetscLayoutSetUp(B->rmap));
3904   PetscCall(PetscLayoutSetUp(B->cmap));
3905   m      = B->rmap->n;
3906   cstart = B->cmap->rstart;
3907   cend   = B->cmap->rend;
3908   rstart = B->rmap->rstart;
3909 
3910   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3911 
3912   if (PetscDefined(USE_DEBUG)) {
3913     for (i = 0; i < m; i++) {
3914       nnz = Ii[i + 1] - Ii[i];
3915       JJ  = J + Ii[i];
3916       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3917       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3918       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3919     }
3920   }
3921 
3922   for (i = 0; i < m; i++) {
3923     nnz     = Ii[i + 1] - Ii[i];
3924     JJ      = J + Ii[i];
3925     nnz_max = PetscMax(nnz_max, nnz);
3926     d       = 0;
3927     for (j = 0; j < nnz; j++) {
3928       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3929     }
3930     d_nnz[i] = d;
3931     o_nnz[i] = nnz - d;
3932   }
3933   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3934   PetscCall(PetscFree2(d_nnz, o_nnz));
3935 
3936   for (i = 0; i < m; i++) {
3937     ii = i + rstart;
3938     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3939   }
3940   nooffprocentries    = B->nooffprocentries;
3941   B->nooffprocentries = PETSC_TRUE;
3942   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3943   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3944   B->nooffprocentries = nooffprocentries;
3945 
3946   /* count number of entries below block diagonal */
3947   PetscCall(PetscFree(Aij->ld));
3948   PetscCall(PetscCalloc1(m, &ld));
3949   Aij->ld = ld;
3950   for (i = 0; i < m; i++) {
3951     nnz = Ii[i + 1] - Ii[i];
3952     j   = 0;
3953     while (j < nnz && J[j] < cstart) j++;
3954     ld[i] = j;
3955     J += nnz;
3956   }
3957 
3958   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3959   PetscFunctionReturn(PETSC_SUCCESS);
3960 }
3961 
3962 /*@
3963    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3964    (the default parallel PETSc format).
3965 
3966    Collective
3967 
3968    Input Parameters:
3969 +  B - the matrix
3970 .  i - the indices into j for the start of each local row (starts with zero)
3971 .  j - the column indices for each local row (starts with zero)
3972 -  v - optional values in the matrix
3973 
3974    Level: developer
3975 
3976    Notes:
3977        The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3978      thus you CANNOT change the matrix entries by changing the values of `v` after you have
3979      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3980 
3981        The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3982 
3983        The format which is used for the sparse matrix input, is equivalent to a
3984     row-major ordering.. i.e for the following matrix, the input data expected is
3985     as shown
3986 
3987 .vb
3988         1 0 0
3989         2 0 3     P0
3990        -------
3991         4 5 6     P1
3992 
3993      Process0 [P0] rows_owned=[0,1]
3994         i =  {0,1,3}  [size = nrow+1  = 2+1]
3995         j =  {0,0,2}  [size = 3]
3996         v =  {1,2,3}  [size = 3]
3997 
3998      Process1 [P1] rows_owned=[2]
3999         i =  {0,3}    [size = nrow+1  = 1+1]
4000         j =  {0,1,2}  [size = 3]
4001         v =  {4,5,6}  [size = 3]
4002 .ve
4003 
4004 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
4005           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
4006 @*/
4007 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4008 {
4009   PetscFunctionBegin;
4010   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4011   PetscFunctionReturn(PETSC_SUCCESS);
4012 }
4013 
4014 /*@C
4015    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4016    (the default parallel PETSc format).  For good matrix assembly performance
4017    the user should preallocate the matrix storage by setting the parameters
4018    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4019 
4020    Collective
4021 
4022    Input Parameters:
4023 +  B - the matrix
4024 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4025            (same value is used for all local rows)
4026 .  d_nnz - array containing the number of nonzeros in the various rows of the
4027            DIAGONAL portion of the local submatrix (possibly different for each row)
4028            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4029            The size of this array is equal to the number of local rows, i.e 'm'.
4030            For matrices that will be factored, you must leave room for (and set)
4031            the diagonal entry even if it is zero.
4032 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4033            submatrix (same value is used for all local rows).
4034 -  o_nnz - array containing the number of nonzeros in the various rows of the
4035            OFF-DIAGONAL portion of the local submatrix (possibly different for
4036            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4037            structure. The size of this array is equal to the number
4038            of local rows, i.e 'm'.
4039 
4040    Usage:
4041    Consider the following 8x8 matrix with 34 non-zero values, that is
4042    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4043    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4044    as follows
4045 
4046 .vb
4047             1  2  0  |  0  3  0  |  0  4
4048     Proc0   0  5  6  |  7  0  0  |  8  0
4049             9  0 10  | 11  0  0  | 12  0
4050     -------------------------------------
4051            13  0 14  | 15 16 17  |  0  0
4052     Proc1   0 18  0  | 19 20 21  |  0  0
4053             0  0  0  | 22 23  0  | 24  0
4054     -------------------------------------
4055     Proc2  25 26 27  |  0  0 28  | 29  0
4056            30  0  0  | 31 32 33  |  0 34
4057 .ve
4058 
4059    This can be represented as a collection of submatrices as
4060 .vb
4061       A B C
4062       D E F
4063       G H I
4064 .ve
4065 
4066    Where the submatrices A,B,C are owned by proc0, D,E,F are
4067    owned by proc1, G,H,I are owned by proc2.
4068 
4069    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4070    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4071    The 'M','N' parameters are 8,8, and have the same values on all procs.
4072 
4073    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4074    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4075    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4076    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4077    part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4078    matrix, ans [DF] as another `MATSEQAIJ` matrix.
4079 
4080    When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4081    allocated for every row of the local diagonal submatrix, and `o_nz`
4082    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4083    One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4084    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4085    In this case, the values of `d_nz`, `o_nz` are
4086 .vb
4087      proc0  dnz = 2, o_nz = 2
4088      proc1  dnz = 3, o_nz = 2
4089      proc2  dnz = 1, o_nz = 4
4090 .ve
4091    We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4092    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4093    for proc3. i.e we are using 12+15+10=37 storage locations to store
4094    34 values.
4095 
4096    When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4097    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4098    In the above case the values for `d_nnz`, `o_nnz` are
4099 .vb
4100      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4101      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4102      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4103 .ve
4104    Here the space allocated is sum of all the above values i.e 34, and
4105    hence pre-allocation is perfect.
4106 
4107    Level: intermediate
4108 
4109    Notes:
4110    If the *_nnz parameter is given then the *_nz parameter is ignored
4111 
4112    The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4113    storage.  The stored row and column indices begin with zero.
4114    See [Sparse Matrices](sec_matsparse) for details.
4115 
4116    The parallel matrix is partitioned such that the first m0 rows belong to
4117    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4118    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4119 
4120    The DIAGONAL portion of the local submatrix of a processor can be defined
4121    as the submatrix which is obtained by extraction the part corresponding to
4122    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4123    first row that belongs to the processor, r2 is the last row belonging to
4124    the this processor, and c1-c2 is range of indices of the local part of a
4125    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4126    common case of a square matrix, the row and column ranges are the same and
4127    the DIAGONAL part is also square. The remaining portion of the local
4128    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4129 
4130    If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4131 
4132    You can call `MatGetInfo()` to get information on how effective the preallocation was;
4133    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4134    You can also run with the option `-info` and look for messages with the string
4135    malloc in them to see if additional memory allocation was needed.
4136 
4137 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4138           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4139 @*/
4140 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4141 {
4142   PetscFunctionBegin;
4143   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4144   PetscValidType(B, 1);
4145   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4146   PetscFunctionReturn(PETSC_SUCCESS);
4147 }
4148 
4149 /*@
4150      MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4151          CSR format for the local rows.
4152 
4153    Collective
4154 
4155    Input Parameters:
4156 +  comm - MPI communicator
4157 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4158 .  n - This value should be the same as the local size used in creating the
4159        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4160        calculated if N is given) For square matrices n is almost always m.
4161 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4162 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4163 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4164 .   j - column indices
4165 -   a - optional matrix values
4166 
4167    Output Parameter:
4168 .   mat - the matrix
4169 
4170    Level: intermediate
4171 
4172    Notes:
4173        The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4174      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4175      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4176 
4177        The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4178 
4179        The format which is used for the sparse matrix input, is equivalent to a
4180     row-major ordering.. i.e for the following matrix, the input data expected is
4181     as shown
4182 
4183        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4184 .vb
4185         1 0 0
4186         2 0 3     P0
4187        -------
4188         4 5 6     P1
4189 
4190      Process0 [P0] rows_owned=[0,1]
4191         i =  {0,1,3}  [size = nrow+1  = 2+1]
4192         j =  {0,0,2}  [size = 3]
4193         v =  {1,2,3}  [size = 3]
4194 
4195      Process1 [P1] rows_owned=[2]
4196         i =  {0,3}    [size = nrow+1  = 1+1]
4197         j =  {0,1,2}  [size = 3]
4198         v =  {4,5,6}  [size = 3]
4199 .ve
4200 
4201 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4202           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4203 @*/
4204 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4205 {
4206   PetscFunctionBegin;
4207   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4208   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4209   PetscCall(MatCreate(comm, mat));
4210   PetscCall(MatSetSizes(*mat, m, n, M, N));
4211   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4212   PetscCall(MatSetType(*mat, MATMPIAIJ));
4213   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4214   PetscFunctionReturn(PETSC_SUCCESS);
4215 }
4216 
4217 /*@
4218      MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4219      CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4220      from `MatCreateMPIAIJWithArrays()`
4221 
4222      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4223 
4224    Collective
4225 
4226    Input Parameters:
4227 +  mat - the matrix
4228 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4229 .  n - This value should be the same as the local size used in creating the
4230        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4231        calculated if N is given) For square matrices n is almost always m.
4232 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4233 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4234 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4235 .  J - column indices
4236 -  v - matrix values
4237 
4238    Level: deprecated
4239 
4240 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4241           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4242 @*/
4243 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4244 {
4245   PetscInt        nnz, i;
4246   PetscBool       nooffprocentries;
4247   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4248   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4249   PetscScalar    *ad, *ao;
4250   PetscInt        ldi, Iii, md;
4251   const PetscInt *Adi = Ad->i;
4252   PetscInt       *ld  = Aij->ld;
4253 
4254   PetscFunctionBegin;
4255   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4256   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4257   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4258   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4259 
4260   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4261   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4262 
4263   for (i = 0; i < m; i++) {
4264     nnz = Ii[i + 1] - Ii[i];
4265     Iii = Ii[i];
4266     ldi = ld[i];
4267     md  = Adi[i + 1] - Adi[i];
4268     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4269     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4270     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4271     ad += md;
4272     ao += nnz - md;
4273   }
4274   nooffprocentries      = mat->nooffprocentries;
4275   mat->nooffprocentries = PETSC_TRUE;
4276   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4277   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4278   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4279   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4280   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4281   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4282   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4283   mat->nooffprocentries = nooffprocentries;
4284   PetscFunctionReturn(PETSC_SUCCESS);
4285 }
4286 
4287 /*@
4288      MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4289 
4290    Collective
4291 
4292    Input Parameters:
4293 +  mat - the matrix
4294 -  v - matrix values, stored by row
4295 
4296    Level: intermediate
4297 
4298    Note:
4299    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4300 
4301 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4302           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4303 @*/
4304 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4305 {
4306   PetscInt        nnz, i, m;
4307   PetscBool       nooffprocentries;
4308   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4309   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4310   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4311   PetscScalar    *ad, *ao;
4312   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4313   PetscInt        ldi, Iii, md;
4314   PetscInt       *ld = Aij->ld;
4315 
4316   PetscFunctionBegin;
4317   m = mat->rmap->n;
4318 
4319   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4320   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4321   Iii = 0;
4322   for (i = 0; i < m; i++) {
4323     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4324     ldi = ld[i];
4325     md  = Adi[i + 1] - Adi[i];
4326     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4327     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4328     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4329     ad += md;
4330     ao += nnz - md;
4331     Iii += nnz;
4332   }
4333   nooffprocentries      = mat->nooffprocentries;
4334   mat->nooffprocentries = PETSC_TRUE;
4335   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4336   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4337   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4338   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4339   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4340   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4341   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4342   mat->nooffprocentries = nooffprocentries;
4343   PetscFunctionReturn(PETSC_SUCCESS);
4344 }
4345 
4346 /*@C
4347    MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4348    (the default parallel PETSc format).  For good matrix assembly performance
4349    the user should preallocate the matrix storage by setting the parameters
4350    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4351 
4352    Collective
4353 
4354    Input Parameters:
4355 +  comm - MPI communicator
4356 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4357            This value should be the same as the local size used in creating the
4358            y vector for the matrix-vector product y = Ax.
4359 .  n - This value should be the same as the local size used in creating the
4360        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4361        calculated if N is given) For square matrices n is almost always m.
4362 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4363 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4364 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4365            (same value is used for all local rows)
4366 .  d_nnz - array containing the number of nonzeros in the various rows of the
4367            DIAGONAL portion of the local submatrix (possibly different for each row)
4368            or `NULL`, if `d_nz` is used to specify the nonzero structure.
4369            The size of this array is equal to the number of local rows, i.e 'm'.
4370 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4371            submatrix (same value is used for all local rows).
4372 -  o_nnz - array containing the number of nonzeros in the various rows of the
4373            OFF-DIAGONAL portion of the local submatrix (possibly different for
4374            each row) or `NULL`, if `o_nz` is used to specify the nonzero
4375            structure. The size of this array is equal to the number
4376            of local rows, i.e 'm'.
4377 
4378    Output Parameter:
4379 .  A - the matrix
4380 
4381    Options Database Keys:
4382 +  -mat_no_inode  - Do not use inodes
4383 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4384 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4385         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4386         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4387 
4388    Level: intermediate
4389 
4390    Notes:
4391    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4392    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4393    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4394 
4395    If the *_nnz parameter is given then the *_nz parameter is ignored
4396 
4397    The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4398    processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4399    storage requirements for this matrix.
4400 
4401    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4402    processor than it must be used on all processors that share the object for
4403    that argument.
4404 
4405    The user MUST specify either the local or global matrix dimensions
4406    (possibly both).
4407 
4408    The parallel matrix is partitioned across processors such that the
4409    first m0 rows belong to process 0, the next m1 rows belong to
4410    process 1, the next m2 rows belong to process 2 etc.. where
4411    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4412    values corresponding to [m x N] submatrix.
4413 
4414    The columns are logically partitioned with the n0 columns belonging
4415    to 0th partition, the next n1 columns belonging to the next
4416    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4417 
4418    The DIAGONAL portion of the local submatrix on any given processor
4419    is the submatrix corresponding to the rows and columns m,n
4420    corresponding to the given processor. i.e diagonal matrix on
4421    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4422    etc. The remaining portion of the local submatrix [m x (N-n)]
4423    constitute the OFF-DIAGONAL portion. The example below better
4424    illustrates this concept.
4425 
4426    For a square global matrix we define each processor's diagonal portion
4427    to be its local rows and the corresponding columns (a square submatrix);
4428    each processor's off-diagonal portion encompasses the remainder of the
4429    local matrix (a rectangular submatrix).
4430 
4431    If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4432 
4433    When calling this routine with a single process communicator, a matrix of
4434    type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4435    type of communicator, use the construction mechanism
4436 .vb
4437      MatCreate(...,&A);
4438      MatSetType(A,MATMPIAIJ);
4439      MatSetSizes(A, m,n,M,N);
4440      MatMPIAIJSetPreallocation(A,...);
4441 .ve
4442 
4443    By default, this format uses inodes (identical nodes) when possible.
4444    We search for consecutive rows with the same nonzero structure, thereby
4445    reusing matrix information to achieve increased efficiency.
4446 
4447    Usage:
4448    Consider the following 8x8 matrix with 34 non-zero values, that is
4449    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4450    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4451    as follows
4452 
4453 .vb
4454             1  2  0  |  0  3  0  |  0  4
4455     Proc0   0  5  6  |  7  0  0  |  8  0
4456             9  0 10  | 11  0  0  | 12  0
4457     -------------------------------------
4458            13  0 14  | 15 16 17  |  0  0
4459     Proc1   0 18  0  | 19 20 21  |  0  0
4460             0  0  0  | 22 23  0  | 24  0
4461     -------------------------------------
4462     Proc2  25 26 27  |  0  0 28  | 29  0
4463            30  0  0  | 31 32 33  |  0 34
4464 .ve
4465 
4466    This can be represented as a collection of submatrices as
4467 
4468 .vb
4469       A B C
4470       D E F
4471       G H I
4472 .ve
4473 
4474    Where the submatrices A,B,C are owned by proc0, D,E,F are
4475    owned by proc1, G,H,I are owned by proc2.
4476 
4477    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4478    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4479    The 'M','N' parameters are 8,8, and have the same values on all procs.
4480 
4481    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4482    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4483    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4484    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4485    part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4486    matrix, ans [DF] as another SeqAIJ matrix.
4487 
4488    When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4489    allocated for every row of the local diagonal submatrix, and `o_nz`
4490    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4491    One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4492    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4493    In this case, the values of `d_nz`,`o_nz` are
4494 .vb
4495      proc0  dnz = 2, o_nz = 2
4496      proc1  dnz = 3, o_nz = 2
4497      proc2  dnz = 1, o_nz = 4
4498 .ve
4499    We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4500    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4501    for proc3. i.e we are using 12+15+10=37 storage locations to store
4502    34 values.
4503 
4504    When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4505    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4506    In the above case the values for d_nnz,o_nnz are
4507 .vb
4508      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4509      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4510      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4511 .ve
4512    Here the space allocated is sum of all the above values i.e 34, and
4513    hence pre-allocation is perfect.
4514 
4515 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4516           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4517 @*/
4518 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4519 {
4520   PetscMPIInt size;
4521 
4522   PetscFunctionBegin;
4523   PetscCall(MatCreate(comm, A));
4524   PetscCall(MatSetSizes(*A, m, n, M, N));
4525   PetscCallMPI(MPI_Comm_size(comm, &size));
4526   if (size > 1) {
4527     PetscCall(MatSetType(*A, MATMPIAIJ));
4528     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4529   } else {
4530     PetscCall(MatSetType(*A, MATSEQAIJ));
4531     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4532   }
4533   PetscFunctionReturn(PETSC_SUCCESS);
4534 }
4535 
4536 /*MC
4537     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4538 
4539     Synopsis:
4540     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4541 
4542     Not Collective
4543 
4544     Input Parameter:
4545 .   A - the `MATMPIAIJ` matrix
4546 
4547     Output Parameters:
4548 +   Ad - the diagonal portion of the matrix
4549 .   Ao - the off diagonal portion of the matrix
4550 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4551 -   ierr - error code
4552 
4553      Level: advanced
4554 
4555     Note:
4556     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4557 
4558 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4559 M*/
4560 
4561 /*MC
4562     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4563 
4564     Synopsis:
4565     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4566 
4567     Not Collective
4568 
4569     Input Parameters:
4570 +   A - the `MATMPIAIJ` matrix
4571 .   Ad - the diagonal portion of the matrix
4572 .   Ao - the off diagonal portion of the matrix
4573 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4574 -   ierr - error code
4575 
4576      Level: advanced
4577 
4578 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4579 M*/
4580 
4581 /*@C
4582   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4583 
4584   Not Collective
4585 
4586   Input Parameter:
4587 . A - The `MATMPIAIJ` matrix
4588 
4589   Output Parameters:
4590 + Ad - The local diagonal block as a `MATSEQAIJ` matrix
4591 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix
4592 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4593 
4594   Level: intermediate
4595 
4596   Note:
4597   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4598   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4599   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4600   local column numbers to global column numbers in the original matrix.
4601 
4602   Fortran Note:
4603   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4604 
4605 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4606 @*/
4607 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4608 {
4609   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4610   PetscBool   flg;
4611 
4612   PetscFunctionBegin;
4613   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4614   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4615   if (Ad) *Ad = a->A;
4616   if (Ao) *Ao = a->B;
4617   if (colmap) *colmap = a->garray;
4618   PetscFunctionReturn(PETSC_SUCCESS);
4619 }
4620 
4621 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4622 {
4623   PetscInt     m, N, i, rstart, nnz, Ii;
4624   PetscInt    *indx;
4625   PetscScalar *values;
4626   MatType      rootType;
4627 
4628   PetscFunctionBegin;
4629   PetscCall(MatGetSize(inmat, &m, &N));
4630   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4631     PetscInt *dnz, *onz, sum, bs, cbs;
4632 
4633     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4634     /* Check sum(n) = N */
4635     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4636     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4637 
4638     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4639     rstart -= m;
4640 
4641     MatPreallocateBegin(comm, m, n, dnz, onz);
4642     for (i = 0; i < m; i++) {
4643       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4644       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4645       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4646     }
4647 
4648     PetscCall(MatCreate(comm, outmat));
4649     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4650     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4651     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4652     PetscCall(MatGetRootType_Private(inmat, &rootType));
4653     PetscCall(MatSetType(*outmat, rootType));
4654     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4655     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4656     MatPreallocateEnd(dnz, onz);
4657     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4658   }
4659 
4660   /* numeric phase */
4661   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4662   for (i = 0; i < m; i++) {
4663     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4664     Ii = i + rstart;
4665     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4666     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4667   }
4668   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4669   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4670   PetscFunctionReturn(PETSC_SUCCESS);
4671 }
4672 
4673 PetscErrorCode MatFileSplit(Mat A, char *outfile)
4674 {
4675   PetscMPIInt        rank;
4676   PetscInt           m, N, i, rstart, nnz;
4677   size_t             len;
4678   const PetscInt    *indx;
4679   PetscViewer        out;
4680   char              *name;
4681   Mat                B;
4682   const PetscScalar *values;
4683 
4684   PetscFunctionBegin;
4685   PetscCall(MatGetLocalSize(A, &m, NULL));
4686   PetscCall(MatGetSize(A, NULL, &N));
4687   /* Should this be the type of the diagonal block of A? */
4688   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4689   PetscCall(MatSetSizes(B, m, N, m, N));
4690   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4691   PetscCall(MatSetType(B, MATSEQAIJ));
4692   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4693   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4694   for (i = 0; i < m; i++) {
4695     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4696     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4697     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4698   }
4699   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4700   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4701 
4702   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4703   PetscCall(PetscStrlen(outfile, &len));
4704   PetscCall(PetscMalloc1(len + 6, &name));
4705   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4706   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4707   PetscCall(PetscFree(name));
4708   PetscCall(MatView(B, out));
4709   PetscCall(PetscViewerDestroy(&out));
4710   PetscCall(MatDestroy(&B));
4711   PetscFunctionReturn(PETSC_SUCCESS);
4712 }
4713 
4714 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4715 {
4716   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4717 
4718   PetscFunctionBegin;
4719   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4720   PetscCall(PetscFree(merge->id_r));
4721   PetscCall(PetscFree(merge->len_s));
4722   PetscCall(PetscFree(merge->len_r));
4723   PetscCall(PetscFree(merge->bi));
4724   PetscCall(PetscFree(merge->bj));
4725   PetscCall(PetscFree(merge->buf_ri[0]));
4726   PetscCall(PetscFree(merge->buf_ri));
4727   PetscCall(PetscFree(merge->buf_rj[0]));
4728   PetscCall(PetscFree(merge->buf_rj));
4729   PetscCall(PetscFree(merge->coi));
4730   PetscCall(PetscFree(merge->coj));
4731   PetscCall(PetscFree(merge->owners_co));
4732   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4733   PetscCall(PetscFree(merge));
4734   PetscFunctionReturn(PETSC_SUCCESS);
4735 }
4736 
4737 #include <../src/mat/utils/freespace.h>
4738 #include <petscbt.h>
4739 
4740 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4741 {
4742   MPI_Comm             comm;
4743   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4744   PetscMPIInt          size, rank, taga, *len_s;
4745   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4746   PetscInt             proc, m;
4747   PetscInt           **buf_ri, **buf_rj;
4748   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4749   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4750   MPI_Request         *s_waits, *r_waits;
4751   MPI_Status          *status;
4752   const MatScalar     *aa, *a_a;
4753   MatScalar          **abuf_r, *ba_i;
4754   Mat_Merge_SeqsToMPI *merge;
4755   PetscContainer       container;
4756 
4757   PetscFunctionBegin;
4758   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4759   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4760 
4761   PetscCallMPI(MPI_Comm_size(comm, &size));
4762   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4763 
4764   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4765   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4766   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4767   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4768   aa = a_a;
4769 
4770   bi     = merge->bi;
4771   bj     = merge->bj;
4772   buf_ri = merge->buf_ri;
4773   buf_rj = merge->buf_rj;
4774 
4775   PetscCall(PetscMalloc1(size, &status));
4776   owners = merge->rowmap->range;
4777   len_s  = merge->len_s;
4778 
4779   /* send and recv matrix values */
4780   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4781   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4782 
4783   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4784   for (proc = 0, k = 0; proc < size; proc++) {
4785     if (!len_s[proc]) continue;
4786     i = owners[proc];
4787     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4788     k++;
4789   }
4790 
4791   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4792   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4793   PetscCall(PetscFree(status));
4794 
4795   PetscCall(PetscFree(s_waits));
4796   PetscCall(PetscFree(r_waits));
4797 
4798   /* insert mat values of mpimat */
4799   PetscCall(PetscMalloc1(N, &ba_i));
4800   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4801 
4802   for (k = 0; k < merge->nrecv; k++) {
4803     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4804     nrows       = *(buf_ri_k[k]);
4805     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4806     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4807   }
4808 
4809   /* set values of ba */
4810   m = merge->rowmap->n;
4811   for (i = 0; i < m; i++) {
4812     arow = owners[rank] + i;
4813     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4814     bnzi = bi[i + 1] - bi[i];
4815     PetscCall(PetscArrayzero(ba_i, bnzi));
4816 
4817     /* add local non-zero vals of this proc's seqmat into ba */
4818     anzi   = ai[arow + 1] - ai[arow];
4819     aj     = a->j + ai[arow];
4820     aa     = a_a + ai[arow];
4821     nextaj = 0;
4822     for (j = 0; nextaj < anzi; j++) {
4823       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4824         ba_i[j] += aa[nextaj++];
4825       }
4826     }
4827 
4828     /* add received vals into ba */
4829     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4830       /* i-th row */
4831       if (i == *nextrow[k]) {
4832         anzi   = *(nextai[k] + 1) - *nextai[k];
4833         aj     = buf_rj[k] + *(nextai[k]);
4834         aa     = abuf_r[k] + *(nextai[k]);
4835         nextaj = 0;
4836         for (j = 0; nextaj < anzi; j++) {
4837           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4838             ba_i[j] += aa[nextaj++];
4839           }
4840         }
4841         nextrow[k]++;
4842         nextai[k]++;
4843       }
4844     }
4845     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4846   }
4847   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4848   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4849   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4850 
4851   PetscCall(PetscFree(abuf_r[0]));
4852   PetscCall(PetscFree(abuf_r));
4853   PetscCall(PetscFree(ba_i));
4854   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4855   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4856   PetscFunctionReturn(PETSC_SUCCESS);
4857 }
4858 
4859 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4860 {
4861   Mat                  B_mpi;
4862   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4863   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4864   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4865   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4866   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4867   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4868   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4869   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4870   MPI_Status          *status;
4871   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4872   PetscBT              lnkbt;
4873   Mat_Merge_SeqsToMPI *merge;
4874   PetscContainer       container;
4875 
4876   PetscFunctionBegin;
4877   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4878 
4879   /* make sure it is a PETSc comm */
4880   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4881   PetscCallMPI(MPI_Comm_size(comm, &size));
4882   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4883 
4884   PetscCall(PetscNew(&merge));
4885   PetscCall(PetscMalloc1(size, &status));
4886 
4887   /* determine row ownership */
4888   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4889   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4890   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4891   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4892   PetscCall(PetscLayoutSetUp(merge->rowmap));
4893   PetscCall(PetscMalloc1(size, &len_si));
4894   PetscCall(PetscMalloc1(size, &merge->len_s));
4895 
4896   m      = merge->rowmap->n;
4897   owners = merge->rowmap->range;
4898 
4899   /* determine the number of messages to send, their lengths */
4900   len_s = merge->len_s;
4901 
4902   len          = 0; /* length of buf_si[] */
4903   merge->nsend = 0;
4904   for (proc = 0; proc < size; proc++) {
4905     len_si[proc] = 0;
4906     if (proc == rank) {
4907       len_s[proc] = 0;
4908     } else {
4909       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4910       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4911     }
4912     if (len_s[proc]) {
4913       merge->nsend++;
4914       nrows = 0;
4915       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4916         if (ai[i + 1] > ai[i]) nrows++;
4917       }
4918       len_si[proc] = 2 * (nrows + 1);
4919       len += len_si[proc];
4920     }
4921   }
4922 
4923   /* determine the number and length of messages to receive for ij-structure */
4924   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4925   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4926 
4927   /* post the Irecv of j-structure */
4928   PetscCall(PetscCommGetNewTag(comm, &tagj));
4929   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4930 
4931   /* post the Isend of j-structure */
4932   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4933 
4934   for (proc = 0, k = 0; proc < size; proc++) {
4935     if (!len_s[proc]) continue;
4936     i = owners[proc];
4937     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4938     k++;
4939   }
4940 
4941   /* receives and sends of j-structure are complete */
4942   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4943   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4944 
4945   /* send and recv i-structure */
4946   PetscCall(PetscCommGetNewTag(comm, &tagi));
4947   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4948 
4949   PetscCall(PetscMalloc1(len + 1, &buf_s));
4950   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4951   for (proc = 0, k = 0; proc < size; proc++) {
4952     if (!len_s[proc]) continue;
4953     /* form outgoing message for i-structure:
4954          buf_si[0]:                 nrows to be sent
4955                [1:nrows]:           row index (global)
4956                [nrows+1:2*nrows+1]: i-structure index
4957     */
4958     nrows       = len_si[proc] / 2 - 1;
4959     buf_si_i    = buf_si + nrows + 1;
4960     buf_si[0]   = nrows;
4961     buf_si_i[0] = 0;
4962     nrows       = 0;
4963     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4964       anzi = ai[i + 1] - ai[i];
4965       if (anzi) {
4966         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4967         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4968         nrows++;
4969       }
4970     }
4971     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4972     k++;
4973     buf_si += len_si[proc];
4974   }
4975 
4976   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4977   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4978 
4979   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4980   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4981 
4982   PetscCall(PetscFree(len_si));
4983   PetscCall(PetscFree(len_ri));
4984   PetscCall(PetscFree(rj_waits));
4985   PetscCall(PetscFree2(si_waits, sj_waits));
4986   PetscCall(PetscFree(ri_waits));
4987   PetscCall(PetscFree(buf_s));
4988   PetscCall(PetscFree(status));
4989 
4990   /* compute a local seq matrix in each processor */
4991   /* allocate bi array and free space for accumulating nonzero column info */
4992   PetscCall(PetscMalloc1(m + 1, &bi));
4993   bi[0] = 0;
4994 
4995   /* create and initialize a linked list */
4996   nlnk = N + 1;
4997   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4998 
4999   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5000   len = ai[owners[rank + 1]] - ai[owners[rank]];
5001   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
5002 
5003   current_space = free_space;
5004 
5005   /* determine symbolic info for each local row */
5006   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
5007 
5008   for (k = 0; k < merge->nrecv; k++) {
5009     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5010     nrows       = *buf_ri_k[k];
5011     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
5012     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
5013   }
5014 
5015   MatPreallocateBegin(comm, m, n, dnz, onz);
5016   len = 0;
5017   for (i = 0; i < m; i++) {
5018     bnzi = 0;
5019     /* add local non-zero cols of this proc's seqmat into lnk */
5020     arow = owners[rank] + i;
5021     anzi = ai[arow + 1] - ai[arow];
5022     aj   = a->j + ai[arow];
5023     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5024     bnzi += nlnk;
5025     /* add received col data into lnk */
5026     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5027       if (i == *nextrow[k]) {            /* i-th row */
5028         anzi = *(nextai[k] + 1) - *nextai[k];
5029         aj   = buf_rj[k] + *nextai[k];
5030         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5031         bnzi += nlnk;
5032         nextrow[k]++;
5033         nextai[k]++;
5034       }
5035     }
5036     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5037 
5038     /* if free space is not available, make more free space */
5039     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5040     /* copy data into free space, then initialize lnk */
5041     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5042     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5043 
5044     current_space->array += bnzi;
5045     current_space->local_used += bnzi;
5046     current_space->local_remaining -= bnzi;
5047 
5048     bi[i + 1] = bi[i] + bnzi;
5049   }
5050 
5051   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5052 
5053   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5054   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5055   PetscCall(PetscLLDestroy(lnk, lnkbt));
5056 
5057   /* create symbolic parallel matrix B_mpi */
5058   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5059   PetscCall(MatCreate(comm, &B_mpi));
5060   if (n == PETSC_DECIDE) {
5061     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5062   } else {
5063     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5064   }
5065   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5066   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5067   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5068   MatPreallocateEnd(dnz, onz);
5069   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5070 
5071   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5072   B_mpi->assembled = PETSC_FALSE;
5073   merge->bi        = bi;
5074   merge->bj        = bj;
5075   merge->buf_ri    = buf_ri;
5076   merge->buf_rj    = buf_rj;
5077   merge->coi       = NULL;
5078   merge->coj       = NULL;
5079   merge->owners_co = NULL;
5080 
5081   PetscCall(PetscCommDestroy(&comm));
5082 
5083   /* attach the supporting struct to B_mpi for reuse */
5084   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5085   PetscCall(PetscContainerSetPointer(container, merge));
5086   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5087   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5088   PetscCall(PetscContainerDestroy(&container));
5089   *mpimat = B_mpi;
5090 
5091   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5092   PetscFunctionReturn(PETSC_SUCCESS);
5093 }
5094 
5095 /*@C
5096       MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5097                  matrices from each processor
5098 
5099     Collective
5100 
5101    Input Parameters:
5102 +    comm - the communicators the parallel matrix will live on
5103 .    seqmat - the input sequential matrices
5104 .    m - number of local rows (or `PETSC_DECIDE`)
5105 .    n - number of local columns (or `PETSC_DECIDE`)
5106 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5107 
5108    Output Parameter:
5109 .    mpimat - the parallel matrix generated
5110 
5111     Level: advanced
5112 
5113    Note:
5114      The dimensions of the sequential matrix in each processor MUST be the same.
5115      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5116      destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5117 
5118 seealso: [](chapter_matrices), `Mat`, `MatCreateAIJ()`
5119 @*/
5120 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5121 {
5122   PetscMPIInt size;
5123 
5124   PetscFunctionBegin;
5125   PetscCallMPI(MPI_Comm_size(comm, &size));
5126   if (size == 1) {
5127     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5128     if (scall == MAT_INITIAL_MATRIX) {
5129       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5130     } else {
5131       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5132     }
5133     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5134     PetscFunctionReturn(PETSC_SUCCESS);
5135   }
5136   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5137   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5138   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5139   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5140   PetscFunctionReturn(PETSC_SUCCESS);
5141 }
5142 
5143 /*@
5144      MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking its local rows and putting them into a sequential matrix with
5145           mlocal rows and n columns. Where mlocal is obtained with `MatGetLocalSize()` and n is the global column count obtained
5146           with `MatGetSize()`
5147 
5148     Not Collective
5149 
5150    Input Parameter:
5151 .    A - the matrix
5152 
5153    Output Parameter:
5154 .    A_loc - the local sequential matrix generated
5155 
5156     Level: developer
5157 
5158    Notes:
5159      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5160 
5161      Destroy the matrix with `MatDestroy()`
5162 
5163 .seealso: [](chapter_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5164 @*/
5165 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5166 {
5167   PetscBool mpi;
5168 
5169   PetscFunctionBegin;
5170   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5171   if (mpi) {
5172     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5173   } else {
5174     *A_loc = A;
5175     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5176   }
5177   PetscFunctionReturn(PETSC_SUCCESS);
5178 }
5179 
5180 /*@
5181      MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5182           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5183           with `MatGetSize()`
5184 
5185     Not Collective
5186 
5187    Input Parameters:
5188 +    A - the matrix
5189 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5190 
5191    Output Parameter:
5192 .    A_loc - the local sequential matrix generated
5193 
5194     Level: developer
5195 
5196    Notes:
5197      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5198 
5199      When the communicator associated with `A` has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A`.
5200      If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called.
5201      This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
5202      modify the values of the returned `A_loc`.
5203 
5204 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5205 @*/
5206 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5207 {
5208   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5209   Mat_SeqAIJ        *mat, *a, *b;
5210   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5211   const PetscScalar *aa, *ba, *aav, *bav;
5212   PetscScalar       *ca, *cam;
5213   PetscMPIInt        size;
5214   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5215   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5216   PetscBool          match;
5217 
5218   PetscFunctionBegin;
5219   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5220   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5221   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5222   if (size == 1) {
5223     if (scall == MAT_INITIAL_MATRIX) {
5224       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5225       *A_loc = mpimat->A;
5226     } else if (scall == MAT_REUSE_MATRIX) {
5227       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5228     }
5229     PetscFunctionReturn(PETSC_SUCCESS);
5230   }
5231 
5232   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5233   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5234   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5235   ai = a->i;
5236   aj = a->j;
5237   bi = b->i;
5238   bj = b->j;
5239   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5240   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5241   aa = aav;
5242   ba = bav;
5243   if (scall == MAT_INITIAL_MATRIX) {
5244     PetscCall(PetscMalloc1(1 + am, &ci));
5245     ci[0] = 0;
5246     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5247     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5248     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5249     k = 0;
5250     for (i = 0; i < am; i++) {
5251       ncols_o = bi[i + 1] - bi[i];
5252       ncols_d = ai[i + 1] - ai[i];
5253       /* off-diagonal portion of A */
5254       for (jo = 0; jo < ncols_o; jo++) {
5255         col = cmap[*bj];
5256         if (col >= cstart) break;
5257         cj[k] = col;
5258         bj++;
5259         ca[k++] = *ba++;
5260       }
5261       /* diagonal portion of A */
5262       for (j = 0; j < ncols_d; j++) {
5263         cj[k]   = cstart + *aj++;
5264         ca[k++] = *aa++;
5265       }
5266       /* off-diagonal portion of A */
5267       for (j = jo; j < ncols_o; j++) {
5268         cj[k]   = cmap[*bj++];
5269         ca[k++] = *ba++;
5270       }
5271     }
5272     /* put together the new matrix */
5273     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5274     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5275     /* Since these are PETSc arrays, change flags to free them as necessary. */
5276     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5277     mat->free_a  = PETSC_TRUE;
5278     mat->free_ij = PETSC_TRUE;
5279     mat->nonew   = 0;
5280   } else if (scall == MAT_REUSE_MATRIX) {
5281     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5282     ci  = mat->i;
5283     cj  = mat->j;
5284     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5285     for (i = 0; i < am; i++) {
5286       /* off-diagonal portion of A */
5287       ncols_o = bi[i + 1] - bi[i];
5288       for (jo = 0; jo < ncols_o; jo++) {
5289         col = cmap[*bj];
5290         if (col >= cstart) break;
5291         *cam++ = *ba++;
5292         bj++;
5293       }
5294       /* diagonal portion of A */
5295       ncols_d = ai[i + 1] - ai[i];
5296       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5297       /* off-diagonal portion of A */
5298       for (j = jo; j < ncols_o; j++) {
5299         *cam++ = *ba++;
5300         bj++;
5301       }
5302     }
5303     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5304   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5305   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5306   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5307   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5308   PetscFunctionReturn(PETSC_SUCCESS);
5309 }
5310 
5311 /*@
5312      MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5313           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5314 
5315     Not Collective
5316 
5317    Input Parameters:
5318 +    A - the matrix
5319 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5320 
5321    Output Parameters:
5322 +    glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5323 -    A_loc - the local sequential matrix generated
5324 
5325     Level: developer
5326 
5327    Note:
5328      This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5329      part, then those associated with the off diagonal part (in its local ordering)
5330 
5331 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5332 @*/
5333 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5334 {
5335   Mat             Ao, Ad;
5336   const PetscInt *cmap;
5337   PetscMPIInt     size;
5338   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5339 
5340   PetscFunctionBegin;
5341   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5342   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5343   if (size == 1) {
5344     if (scall == MAT_INITIAL_MATRIX) {
5345       PetscCall(PetscObjectReference((PetscObject)Ad));
5346       *A_loc = Ad;
5347     } else if (scall == MAT_REUSE_MATRIX) {
5348       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5349     }
5350     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5351     PetscFunctionReturn(PETSC_SUCCESS);
5352   }
5353   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5354   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5355   if (f) {
5356     PetscCall((*f)(A, scall, glob, A_loc));
5357   } else {
5358     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5359     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5360     Mat_SeqAIJ        *c;
5361     PetscInt          *ai = a->i, *aj = a->j;
5362     PetscInt          *bi = b->i, *bj = b->j;
5363     PetscInt          *ci, *cj;
5364     const PetscScalar *aa, *ba;
5365     PetscScalar       *ca;
5366     PetscInt           i, j, am, dn, on;
5367 
5368     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5369     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5370     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5371     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5372     if (scall == MAT_INITIAL_MATRIX) {
5373       PetscInt k;
5374       PetscCall(PetscMalloc1(1 + am, &ci));
5375       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5376       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5377       ci[0] = 0;
5378       for (i = 0, k = 0; i < am; i++) {
5379         const PetscInt ncols_o = bi[i + 1] - bi[i];
5380         const PetscInt ncols_d = ai[i + 1] - ai[i];
5381         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5382         /* diagonal portion of A */
5383         for (j = 0; j < ncols_d; j++, k++) {
5384           cj[k] = *aj++;
5385           ca[k] = *aa++;
5386         }
5387         /* off-diagonal portion of A */
5388         for (j = 0; j < ncols_o; j++, k++) {
5389           cj[k] = dn + *bj++;
5390           ca[k] = *ba++;
5391         }
5392       }
5393       /* put together the new matrix */
5394       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5395       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5396       /* Since these are PETSc arrays, change flags to free them as necessary. */
5397       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5398       c->free_a  = PETSC_TRUE;
5399       c->free_ij = PETSC_TRUE;
5400       c->nonew   = 0;
5401       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5402     } else if (scall == MAT_REUSE_MATRIX) {
5403       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5404       for (i = 0; i < am; i++) {
5405         const PetscInt ncols_d = ai[i + 1] - ai[i];
5406         const PetscInt ncols_o = bi[i + 1] - bi[i];
5407         /* diagonal portion of A */
5408         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5409         /* off-diagonal portion of A */
5410         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5411       }
5412       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5413     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5414     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5415     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5416     if (glob) {
5417       PetscInt cst, *gidx;
5418 
5419       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5420       PetscCall(PetscMalloc1(dn + on, &gidx));
5421       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5422       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5423       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5424     }
5425   }
5426   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5427   PetscFunctionReturn(PETSC_SUCCESS);
5428 }
5429 
5430 /*@C
5431      MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5432 
5433     Not Collective
5434 
5435    Input Parameters:
5436 +    A - the matrix
5437 .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5438 .    row - index set of rows to extract (or `NULL`)
5439 -    col - index set of columns to extract (or `NULL`)
5440 
5441    Output Parameter:
5442 .    A_loc - the local sequential matrix generated
5443 
5444     Level: developer
5445 
5446 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5447 @*/
5448 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5449 {
5450   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5451   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5452   IS          isrowa, iscola;
5453   Mat        *aloc;
5454   PetscBool   match;
5455 
5456   PetscFunctionBegin;
5457   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5458   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5459   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5460   if (!row) {
5461     start = A->rmap->rstart;
5462     end   = A->rmap->rend;
5463     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5464   } else {
5465     isrowa = *row;
5466   }
5467   if (!col) {
5468     start = A->cmap->rstart;
5469     cmap  = a->garray;
5470     nzA   = a->A->cmap->n;
5471     nzB   = a->B->cmap->n;
5472     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5473     ncols = 0;
5474     for (i = 0; i < nzB; i++) {
5475       if (cmap[i] < start) idx[ncols++] = cmap[i];
5476       else break;
5477     }
5478     imark = i;
5479     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5480     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5481     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5482   } else {
5483     iscola = *col;
5484   }
5485   if (scall != MAT_INITIAL_MATRIX) {
5486     PetscCall(PetscMalloc1(1, &aloc));
5487     aloc[0] = *A_loc;
5488   }
5489   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5490   if (!col) { /* attach global id of condensed columns */
5491     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5492   }
5493   *A_loc = aloc[0];
5494   PetscCall(PetscFree(aloc));
5495   if (!row) PetscCall(ISDestroy(&isrowa));
5496   if (!col) PetscCall(ISDestroy(&iscola));
5497   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5498   PetscFunctionReturn(PETSC_SUCCESS);
5499 }
5500 
5501 /*
5502  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5503  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5504  * on a global size.
5505  * */
5506 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5507 {
5508   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5509   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5510   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5511   PetscMPIInt            owner;
5512   PetscSFNode           *iremote, *oiremote;
5513   const PetscInt        *lrowindices;
5514   PetscSF                sf, osf;
5515   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5516   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5517   MPI_Comm               comm;
5518   ISLocalToGlobalMapping mapping;
5519   const PetscScalar     *pd_a, *po_a;
5520 
5521   PetscFunctionBegin;
5522   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5523   /* plocalsize is the number of roots
5524    * nrows is the number of leaves
5525    * */
5526   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5527   PetscCall(ISGetLocalSize(rows, &nrows));
5528   PetscCall(PetscCalloc1(nrows, &iremote));
5529   PetscCall(ISGetIndices(rows, &lrowindices));
5530   for (i = 0; i < nrows; i++) {
5531     /* Find a remote index and an owner for a row
5532      * The row could be local or remote
5533      * */
5534     owner = 0;
5535     lidx  = 0;
5536     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5537     iremote[i].index = lidx;
5538     iremote[i].rank  = owner;
5539   }
5540   /* Create SF to communicate how many nonzero columns for each row */
5541   PetscCall(PetscSFCreate(comm, &sf));
5542   /* SF will figure out the number of nonzero colunms for each row, and their
5543    * offsets
5544    * */
5545   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5546   PetscCall(PetscSFSetFromOptions(sf));
5547   PetscCall(PetscSFSetUp(sf));
5548 
5549   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5550   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5551   PetscCall(PetscCalloc1(nrows, &pnnz));
5552   roffsets[0] = 0;
5553   roffsets[1] = 0;
5554   for (i = 0; i < plocalsize; i++) {
5555     /* diag */
5556     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5557     /* off diag */
5558     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5559     /* compute offsets so that we relative location for each row */
5560     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5561     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5562   }
5563   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5564   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5565   /* 'r' means root, and 'l' means leaf */
5566   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5567   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5568   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5569   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5570   PetscCall(PetscSFDestroy(&sf));
5571   PetscCall(PetscFree(roffsets));
5572   PetscCall(PetscFree(nrcols));
5573   dntotalcols = 0;
5574   ontotalcols = 0;
5575   ncol        = 0;
5576   for (i = 0; i < nrows; i++) {
5577     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5578     ncol    = PetscMax(pnnz[i], ncol);
5579     /* diag */
5580     dntotalcols += nlcols[i * 2 + 0];
5581     /* off diag */
5582     ontotalcols += nlcols[i * 2 + 1];
5583   }
5584   /* We do not need to figure the right number of columns
5585    * since all the calculations will be done by going through the raw data
5586    * */
5587   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5588   PetscCall(MatSetUp(*P_oth));
5589   PetscCall(PetscFree(pnnz));
5590   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5591   /* diag */
5592   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5593   /* off diag */
5594   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5595   /* diag */
5596   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5597   /* off diag */
5598   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5599   dntotalcols = 0;
5600   ontotalcols = 0;
5601   ntotalcols  = 0;
5602   for (i = 0; i < nrows; i++) {
5603     owner = 0;
5604     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5605     /* Set iremote for diag matrix */
5606     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5607       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5608       iremote[dntotalcols].rank  = owner;
5609       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5610       ilocal[dntotalcols++] = ntotalcols++;
5611     }
5612     /* off diag */
5613     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5614       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5615       oiremote[ontotalcols].rank  = owner;
5616       oilocal[ontotalcols++]      = ntotalcols++;
5617     }
5618   }
5619   PetscCall(ISRestoreIndices(rows, &lrowindices));
5620   PetscCall(PetscFree(loffsets));
5621   PetscCall(PetscFree(nlcols));
5622   PetscCall(PetscSFCreate(comm, &sf));
5623   /* P serves as roots and P_oth is leaves
5624    * Diag matrix
5625    * */
5626   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5627   PetscCall(PetscSFSetFromOptions(sf));
5628   PetscCall(PetscSFSetUp(sf));
5629 
5630   PetscCall(PetscSFCreate(comm, &osf));
5631   /* Off diag */
5632   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5633   PetscCall(PetscSFSetFromOptions(osf));
5634   PetscCall(PetscSFSetUp(osf));
5635   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5636   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5637   /* We operate on the matrix internal data for saving memory */
5638   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5639   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5640   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5641   /* Convert to global indices for diag matrix */
5642   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5643   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5644   /* We want P_oth store global indices */
5645   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5646   /* Use memory scalable approach */
5647   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5648   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5649   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5650   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5651   /* Convert back to local indices */
5652   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5653   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5654   nout = 0;
5655   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5656   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5657   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5658   /* Exchange values */
5659   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5660   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5661   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5662   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5663   /* Stop PETSc from shrinking memory */
5664   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5665   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5666   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5667   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5668   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5669   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5670   PetscCall(PetscSFDestroy(&sf));
5671   PetscCall(PetscSFDestroy(&osf));
5672   PetscFunctionReturn(PETSC_SUCCESS);
5673 }
5674 
5675 /*
5676  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5677  * This supports MPIAIJ and MAIJ
5678  * */
5679 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5680 {
5681   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5682   Mat_SeqAIJ *p_oth;
5683   IS          rows, map;
5684   PetscHMapI  hamp;
5685   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5686   MPI_Comm    comm;
5687   PetscSF     sf, osf;
5688   PetscBool   has;
5689 
5690   PetscFunctionBegin;
5691   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5692   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5693   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5694    *  and then create a submatrix (that often is an overlapping matrix)
5695    * */
5696   if (reuse == MAT_INITIAL_MATRIX) {
5697     /* Use a hash table to figure out unique keys */
5698     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5699     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5700     count = 0;
5701     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5702     for (i = 0; i < a->B->cmap->n; i++) {
5703       key = a->garray[i] / dof;
5704       PetscCall(PetscHMapIHas(hamp, key, &has));
5705       if (!has) {
5706         mapping[i] = count;
5707         PetscCall(PetscHMapISet(hamp, key, count++));
5708       } else {
5709         /* Current 'i' has the same value the previous step */
5710         mapping[i] = count - 1;
5711       }
5712     }
5713     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5714     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5715     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5716     PetscCall(PetscCalloc1(htsize, &rowindices));
5717     off = 0;
5718     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5719     PetscCall(PetscHMapIDestroy(&hamp));
5720     PetscCall(PetscSortInt(htsize, rowindices));
5721     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5722     /* In case, the matrix was already created but users want to recreate the matrix */
5723     PetscCall(MatDestroy(P_oth));
5724     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5725     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5726     PetscCall(ISDestroy(&map));
5727     PetscCall(ISDestroy(&rows));
5728   } else if (reuse == MAT_REUSE_MATRIX) {
5729     /* If matrix was already created, we simply update values using SF objects
5730      * that as attached to the matrix earlier.
5731      */
5732     const PetscScalar *pd_a, *po_a;
5733 
5734     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5735     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5736     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5737     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5738     /* Update values in place */
5739     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5740     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5741     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5742     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5743     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5744     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5745     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5746     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5747   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5748   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5749   PetscFunctionReturn(PETSC_SUCCESS);
5750 }
5751 
5752 /*@C
5753   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5754 
5755   Collective
5756 
5757   Input Parameters:
5758 + A - the first matrix in `MATMPIAIJ` format
5759 . B - the second matrix in `MATMPIAIJ` format
5760 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5761 
5762   Output Parameters:
5763 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output
5764 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output
5765 - B_seq - the sequential matrix generated
5766 
5767   Level: developer
5768 
5769 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5770 @*/
5771 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5772 {
5773   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5774   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5775   IS          isrowb, iscolb;
5776   Mat        *bseq = NULL;
5777 
5778   PetscFunctionBegin;
5779   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5780              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5781   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5782 
5783   if (scall == MAT_INITIAL_MATRIX) {
5784     start = A->cmap->rstart;
5785     cmap  = a->garray;
5786     nzA   = a->A->cmap->n;
5787     nzB   = a->B->cmap->n;
5788     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5789     ncols = 0;
5790     for (i = 0; i < nzB; i++) { /* row < local row index */
5791       if (cmap[i] < start) idx[ncols++] = cmap[i];
5792       else break;
5793     }
5794     imark = i;
5795     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5796     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5797     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5798     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5799   } else {
5800     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5801     isrowb = *rowb;
5802     iscolb = *colb;
5803     PetscCall(PetscMalloc1(1, &bseq));
5804     bseq[0] = *B_seq;
5805   }
5806   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5807   *B_seq = bseq[0];
5808   PetscCall(PetscFree(bseq));
5809   if (!rowb) {
5810     PetscCall(ISDestroy(&isrowb));
5811   } else {
5812     *rowb = isrowb;
5813   }
5814   if (!colb) {
5815     PetscCall(ISDestroy(&iscolb));
5816   } else {
5817     *colb = iscolb;
5818   }
5819   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5820   PetscFunctionReturn(PETSC_SUCCESS);
5821 }
5822 
5823 /*
5824     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5825     of the OFF-DIAGONAL portion of local A
5826 
5827     Collective
5828 
5829    Input Parameters:
5830 +    A,B - the matrices in `MATMPIAIJ` format
5831 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5832 
5833    Output Parameter:
5834 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5835 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5836 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5837 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5838 
5839     Developer Note:
5840     This directly accesses information inside the VecScatter associated with the matrix-vector product
5841      for this matrix. This is not desirable..
5842 
5843     Level: developer
5844 
5845 */
5846 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5847 {
5848   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5849   Mat_SeqAIJ        *b_oth;
5850   VecScatter         ctx;
5851   MPI_Comm           comm;
5852   const PetscMPIInt *rprocs, *sprocs;
5853   const PetscInt    *srow, *rstarts, *sstarts;
5854   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5855   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5856   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5857   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5858   PetscMPIInt        size, tag, rank, nreqs;
5859 
5860   PetscFunctionBegin;
5861   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5862   PetscCallMPI(MPI_Comm_size(comm, &size));
5863 
5864   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5865              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5866   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5867   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5868 
5869   if (size == 1) {
5870     startsj_s = NULL;
5871     bufa_ptr  = NULL;
5872     *B_oth    = NULL;
5873     PetscFunctionReturn(PETSC_SUCCESS);
5874   }
5875 
5876   ctx = a->Mvctx;
5877   tag = ((PetscObject)ctx)->tag;
5878 
5879   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5880   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5881   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5882   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5883   PetscCall(PetscMalloc1(nreqs, &reqs));
5884   rwaits = reqs;
5885   swaits = reqs + nrecvs;
5886 
5887   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5888   if (scall == MAT_INITIAL_MATRIX) {
5889     /* i-array */
5890     /*  post receives */
5891     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5892     for (i = 0; i < nrecvs; i++) {
5893       rowlen = rvalues + rstarts[i] * rbs;
5894       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5895       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5896     }
5897 
5898     /* pack the outgoing message */
5899     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5900 
5901     sstartsj[0] = 0;
5902     rstartsj[0] = 0;
5903     len         = 0; /* total length of j or a array to be sent */
5904     if (nsends) {
5905       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5906       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5907     }
5908     for (i = 0; i < nsends; i++) {
5909       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5910       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5911       for (j = 0; j < nrows; j++) {
5912         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5913         for (l = 0; l < sbs; l++) {
5914           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5915 
5916           rowlen[j * sbs + l] = ncols;
5917 
5918           len += ncols;
5919           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5920         }
5921         k++;
5922       }
5923       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5924 
5925       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5926     }
5927     /* recvs and sends of i-array are completed */
5928     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5929     PetscCall(PetscFree(svalues));
5930 
5931     /* allocate buffers for sending j and a arrays */
5932     PetscCall(PetscMalloc1(len + 1, &bufj));
5933     PetscCall(PetscMalloc1(len + 1, &bufa));
5934 
5935     /* create i-array of B_oth */
5936     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5937 
5938     b_othi[0] = 0;
5939     len       = 0; /* total length of j or a array to be received */
5940     k         = 0;
5941     for (i = 0; i < nrecvs; i++) {
5942       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5943       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5944       for (j = 0; j < nrows; j++) {
5945         b_othi[k + 1] = b_othi[k] + rowlen[j];
5946         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5947         k++;
5948       }
5949       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5950     }
5951     PetscCall(PetscFree(rvalues));
5952 
5953     /* allocate space for j and a arrays of B_oth */
5954     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5955     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5956 
5957     /* j-array */
5958     /*  post receives of j-array */
5959     for (i = 0; i < nrecvs; i++) {
5960       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5961       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5962     }
5963 
5964     /* pack the outgoing message j-array */
5965     if (nsends) k = sstarts[0];
5966     for (i = 0; i < nsends; i++) {
5967       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5968       bufJ  = bufj + sstartsj[i];
5969       for (j = 0; j < nrows; j++) {
5970         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5971         for (ll = 0; ll < sbs; ll++) {
5972           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5973           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5974           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5975         }
5976       }
5977       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5978     }
5979 
5980     /* recvs and sends of j-array are completed */
5981     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5982   } else if (scall == MAT_REUSE_MATRIX) {
5983     sstartsj = *startsj_s;
5984     rstartsj = *startsj_r;
5985     bufa     = *bufa_ptr;
5986     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5987     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5988   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5989 
5990   /* a-array */
5991   /*  post receives of a-array */
5992   for (i = 0; i < nrecvs; i++) {
5993     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5994     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5995   }
5996 
5997   /* pack the outgoing message a-array */
5998   if (nsends) k = sstarts[0];
5999   for (i = 0; i < nsends; i++) {
6000     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
6001     bufA  = bufa + sstartsj[i];
6002     for (j = 0; j < nrows; j++) {
6003       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
6004       for (ll = 0; ll < sbs; ll++) {
6005         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6006         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
6007         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6008       }
6009     }
6010     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6011   }
6012   /* recvs and sends of a-array are completed */
6013   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6014   PetscCall(PetscFree(reqs));
6015 
6016   if (scall == MAT_INITIAL_MATRIX) {
6017     /* put together the new matrix */
6018     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6019 
6020     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6021     /* Since these are PETSc arrays, change flags to free them as necessary. */
6022     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6023     b_oth->free_a  = PETSC_TRUE;
6024     b_oth->free_ij = PETSC_TRUE;
6025     b_oth->nonew   = 0;
6026 
6027     PetscCall(PetscFree(bufj));
6028     if (!startsj_s || !bufa_ptr) {
6029       PetscCall(PetscFree2(sstartsj, rstartsj));
6030       PetscCall(PetscFree(bufa_ptr));
6031     } else {
6032       *startsj_s = sstartsj;
6033       *startsj_r = rstartsj;
6034       *bufa_ptr  = bufa;
6035     }
6036   } else if (scall == MAT_REUSE_MATRIX) {
6037     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6038   }
6039 
6040   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6041   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6042   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6043   PetscFunctionReturn(PETSC_SUCCESS);
6044 }
6045 
6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6048 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6049 #if defined(PETSC_HAVE_MKL_SPARSE)
6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6051 #endif
6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6054 #if defined(PETSC_HAVE_ELEMENTAL)
6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6056 #endif
6057 #if defined(PETSC_HAVE_SCALAPACK)
6058 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6059 #endif
6060 #if defined(PETSC_HAVE_HYPRE)
6061 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6062 #endif
6063 #if defined(PETSC_HAVE_CUDA)
6064 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6065 #endif
6066 #if defined(PETSC_HAVE_HIP)
6067 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6068 #endif
6069 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6070 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6071 #endif
6072 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6073 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6074 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6075 
6076 /*
6077     Computes (B'*A')' since computing B*A directly is untenable
6078 
6079                n                       p                          p
6080         [             ]       [             ]         [                 ]
6081       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6082         [             ]       [             ]         [                 ]
6083 
6084 */
6085 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6086 {
6087   Mat At, Bt, Ct;
6088 
6089   PetscFunctionBegin;
6090   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6091   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6092   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6093   PetscCall(MatDestroy(&At));
6094   PetscCall(MatDestroy(&Bt));
6095   PetscCall(MatTransposeSetPrecursor(Ct, C));
6096   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6097   PetscCall(MatDestroy(&Ct));
6098   PetscFunctionReturn(PETSC_SUCCESS);
6099 }
6100 
6101 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6102 {
6103   PetscBool cisdense;
6104 
6105   PetscFunctionBegin;
6106   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6107   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6108   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6109   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6110   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6111   PetscCall(MatSetUp(C));
6112 
6113   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6114   PetscFunctionReturn(PETSC_SUCCESS);
6115 }
6116 
6117 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6118 {
6119   Mat_Product *product = C->product;
6120   Mat          A = product->A, B = product->B;
6121 
6122   PetscFunctionBegin;
6123   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6124              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6125   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6126   C->ops->productsymbolic = MatProductSymbolic_AB;
6127   PetscFunctionReturn(PETSC_SUCCESS);
6128 }
6129 
6130 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6131 {
6132   Mat_Product *product = C->product;
6133 
6134   PetscFunctionBegin;
6135   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6136   PetscFunctionReturn(PETSC_SUCCESS);
6137 }
6138 
6139 /*
6140    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6141 
6142   Input Parameters:
6143 
6144     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6145     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6146 
6147     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6148 
6149     For Set1, j1[] contains column indices of the nonzeros.
6150     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6151     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6152     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6153 
6154     Similar for Set2.
6155 
6156     This routine merges the two sets of nonzeros row by row and removes repeats.
6157 
6158   Output Parameters: (memory is allocated by the caller)
6159 
6160     i[],j[]: the CSR of the merged matrix, which has m rows.
6161     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6162     imap2[]: similar to imap1[], but for Set2.
6163     Note we order nonzeros row-by-row and from left to right.
6164 */
6165 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6166 {
6167   PetscInt   r, m; /* Row index of mat */
6168   PetscCount t, t1, t2, b1, e1, b2, e2;
6169 
6170   PetscFunctionBegin;
6171   PetscCall(MatGetLocalSize(mat, &m, NULL));
6172   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6173   i[0]        = 0;
6174   for (r = 0; r < m; r++) { /* Do row by row merging */
6175     b1 = rowBegin1[r];
6176     e1 = rowEnd1[r];
6177     b2 = rowBegin2[r];
6178     e2 = rowEnd2[r];
6179     while (b1 < e1 && b2 < e2) {
6180       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6181         j[t]      = j1[b1];
6182         imap1[t1] = t;
6183         imap2[t2] = t;
6184         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6185         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6186         t1++;
6187         t2++;
6188         t++;
6189       } else if (j1[b1] < j2[b2]) {
6190         j[t]      = j1[b1];
6191         imap1[t1] = t;
6192         b1 += jmap1[t1 + 1] - jmap1[t1];
6193         t1++;
6194         t++;
6195       } else {
6196         j[t]      = j2[b2];
6197         imap2[t2] = t;
6198         b2 += jmap2[t2 + 1] - jmap2[t2];
6199         t2++;
6200         t++;
6201       }
6202     }
6203     /* Merge the remaining in either j1[] or j2[] */
6204     while (b1 < e1) {
6205       j[t]      = j1[b1];
6206       imap1[t1] = t;
6207       b1 += jmap1[t1 + 1] - jmap1[t1];
6208       t1++;
6209       t++;
6210     }
6211     while (b2 < e2) {
6212       j[t]      = j2[b2];
6213       imap2[t2] = t;
6214       b2 += jmap2[t2 + 1] - jmap2[t2];
6215       t2++;
6216       t++;
6217     }
6218     i[r + 1] = t;
6219   }
6220   PetscFunctionReturn(PETSC_SUCCESS);
6221 }
6222 
6223 /*
6224   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6225 
6226   Input Parameters:
6227     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6228     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6229       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6230 
6231       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6232       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6233 
6234   Output Parameters:
6235     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6236     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6237       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6238       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6239 
6240     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6241       Atot: number of entries belonging to the diagonal block.
6242       Annz: number of unique nonzeros belonging to the diagonal block.
6243       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6244         repeats (i.e., same 'i,j' pair).
6245       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6246         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6247 
6248       Atot: number of entries belonging to the diagonal block
6249       Annz: number of unique nonzeros belonging to the diagonal block.
6250 
6251     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6252 
6253     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6254 */
6255 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6256 {
6257   PetscInt    cstart, cend, rstart, rend, row, col;
6258   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6259   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6260   PetscCount  k, m, p, q, r, s, mid;
6261   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6262 
6263   PetscFunctionBegin;
6264   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6265   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6266   m = rend - rstart;
6267 
6268   for (k = 0; k < n; k++) {
6269     if (i[k] >= 0) break;
6270   } /* Skip negative rows */
6271 
6272   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6273      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6274   */
6275   while (k < n) {
6276     row = i[k];
6277     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6278     for (s = k; s < n; s++)
6279       if (i[s] != row) break;
6280     for (p = k; p < s; p++) {
6281       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6282       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6283     }
6284     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6285     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6286     rowBegin[row - rstart] = k;
6287     rowMid[row - rstart]   = mid;
6288     rowEnd[row - rstart]   = s;
6289 
6290     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6291     Atot += mid - k;
6292     Btot += s - mid;
6293 
6294     /* Count unique nonzeros of this diag/offdiag row */
6295     for (p = k; p < mid;) {
6296       col = j[p];
6297       do {
6298         j[p] += PETSC_MAX_INT;
6299         p++;
6300       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6301       Annz++;
6302     }
6303 
6304     for (p = mid; p < s;) {
6305       col = j[p];
6306       do {
6307         p++;
6308       } while (p < s && j[p] == col);
6309       Bnnz++;
6310     }
6311     k = s;
6312   }
6313 
6314   /* Allocation according to Atot, Btot, Annz, Bnnz */
6315   PetscCall(PetscMalloc1(Atot, &Aperm));
6316   PetscCall(PetscMalloc1(Btot, &Bperm));
6317   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6318   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6319 
6320   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6321   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6322   for (r = 0; r < m; r++) {
6323     k   = rowBegin[r];
6324     mid = rowMid[r];
6325     s   = rowEnd[r];
6326     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6327     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6328     Atot += mid - k;
6329     Btot += s - mid;
6330 
6331     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6332     for (p = k; p < mid;) {
6333       col = j[p];
6334       q   = p;
6335       do {
6336         p++;
6337       } while (p < mid && j[p] == col);
6338       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6339       Annz++;
6340     }
6341 
6342     for (p = mid; p < s;) {
6343       col = j[p];
6344       q   = p;
6345       do {
6346         p++;
6347       } while (p < s && j[p] == col);
6348       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6349       Bnnz++;
6350     }
6351   }
6352   /* Output */
6353   *Aperm_ = Aperm;
6354   *Annz_  = Annz;
6355   *Atot_  = Atot;
6356   *Ajmap_ = Ajmap;
6357   *Bperm_ = Bperm;
6358   *Bnnz_  = Bnnz;
6359   *Btot_  = Btot;
6360   *Bjmap_ = Bjmap;
6361   PetscFunctionReturn(PETSC_SUCCESS);
6362 }
6363 
6364 /*
6365   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6366 
6367   Input Parameters:
6368     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6369     nnz:  number of unique nonzeros in the merged matrix
6370     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6371     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6372 
6373   Output Parameter: (memory is allocated by the caller)
6374     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6375 
6376   Example:
6377     nnz1 = 4
6378     nnz  = 6
6379     imap = [1,3,4,5]
6380     jmap = [0,3,5,6,7]
6381    then,
6382     jmap_new = [0,0,3,3,5,6,7]
6383 */
6384 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6385 {
6386   PetscCount k, p;
6387 
6388   PetscFunctionBegin;
6389   jmap_new[0] = 0;
6390   p           = nnz;                /* p loops over jmap_new[] backwards */
6391   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6392     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6393   }
6394   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6395   PetscFunctionReturn(PETSC_SUCCESS);
6396 }
6397 
6398 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6399 {
6400   MPI_Comm    comm;
6401   PetscMPIInt rank, size;
6402   PetscInt    m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6403   PetscCount  k, p, q, rem;                           /* Loop variables over coo arrays */
6404   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
6405 
6406   PetscFunctionBegin;
6407   PetscCall(PetscFree(mpiaij->garray));
6408   PetscCall(VecDestroy(&mpiaij->lvec));
6409 #if defined(PETSC_USE_CTABLE)
6410   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6411 #else
6412   PetscCall(PetscFree(mpiaij->colmap));
6413 #endif
6414   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6415   mat->assembled     = PETSC_FALSE;
6416   mat->was_assembled = PETSC_FALSE;
6417   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6418 
6419   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6420   PetscCallMPI(MPI_Comm_size(comm, &size));
6421   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6422   PetscCall(PetscLayoutSetUp(mat->rmap));
6423   PetscCall(PetscLayoutSetUp(mat->cmap));
6424   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6425   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6426   PetscCall(MatGetLocalSize(mat, &m, &n));
6427   PetscCall(MatGetSize(mat, &M, &N));
6428 
6429   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6430   /* entries come first, then local rows, then remote rows.                     */
6431   PetscCount n1 = coo_n, *perm1;
6432   PetscInt  *i1 = coo_i, *j1 = coo_j;
6433 
6434   PetscCall(PetscMalloc1(n1, &perm1));
6435   for (k = 0; k < n1; k++) perm1[k] = k;
6436 
6437   /* Manipulate indices so that entries with negative row or col indices will have smallest
6438      row indices, local entries will have greater but negative row indices, and remote entries
6439      will have positive row indices.
6440   */
6441   for (k = 0; k < n1; k++) {
6442     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6443     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6444     else {
6445       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6446       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6447     }
6448   }
6449 
6450   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6451   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6452   for (k = 0; k < n1; k++) {
6453     if (i1[k] > PETSC_MIN_INT) break;
6454   }                                                                               /* Advance k to the first entry we need to take care of */
6455   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6456   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6457 
6458   /*           Split local rows into diag/offdiag portions                      */
6459   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6460   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6461   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6462 
6463   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6464   PetscCall(PetscMalloc1(n1 - rem, &Cperm1));
6465   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6466 
6467   /*           Send remote rows to their owner                                  */
6468   /* Find which rows should be sent to which remote ranks*/
6469   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6470   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6471   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6472   const PetscInt *ranges;
6473   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6474 
6475   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6476   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6477   for (k = rem; k < n1;) {
6478     PetscMPIInt owner;
6479     PetscInt    firstRow, lastRow;
6480 
6481     /* Locate a row range */
6482     firstRow = i1[k]; /* first row of this owner */
6483     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6484     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6485 
6486     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6487     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6488 
6489     /* All entries in [k,p) belong to this remote owner */
6490     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6491       PetscMPIInt *sendto2;
6492       PetscInt    *nentries2;
6493       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6494 
6495       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6496       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6497       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6498       PetscCall(PetscFree2(sendto, nentries2));
6499       sendto   = sendto2;
6500       nentries = nentries2;
6501       maxNsend = maxNsend2;
6502     }
6503     sendto[nsend]   = owner;
6504     nentries[nsend] = p - k;
6505     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6506     nsend++;
6507     k = p;
6508   }
6509 
6510   /* Build 1st SF to know offsets on remote to send data */
6511   PetscSF      sf1;
6512   PetscInt     nroots = 1, nroots2 = 0;
6513   PetscInt     nleaves = nsend, nleaves2 = 0;
6514   PetscInt    *offsets;
6515   PetscSFNode *iremote;
6516 
6517   PetscCall(PetscSFCreate(comm, &sf1));
6518   PetscCall(PetscMalloc1(nsend, &iremote));
6519   PetscCall(PetscMalloc1(nsend, &offsets));
6520   for (k = 0; k < nsend; k++) {
6521     iremote[k].rank  = sendto[k];
6522     iremote[k].index = 0;
6523     nleaves2 += nentries[k];
6524     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6525   }
6526   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6527   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6528   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6529   PetscCall(PetscSFDestroy(&sf1));
6530   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6531 
6532   /* Build 2nd SF to send remote COOs to their owner */
6533   PetscSF sf2;
6534   nroots  = nroots2;
6535   nleaves = nleaves2;
6536   PetscCall(PetscSFCreate(comm, &sf2));
6537   PetscCall(PetscSFSetFromOptions(sf2));
6538   PetscCall(PetscMalloc1(nleaves, &iremote));
6539   p = 0;
6540   for (k = 0; k < nsend; k++) {
6541     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6542     for (q = 0; q < nentries[k]; q++, p++) {
6543       iremote[p].rank  = sendto[k];
6544       iremote[p].index = offsets[k] + q;
6545     }
6546   }
6547   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6548 
6549   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6550   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem));
6551 
6552   /* Send the remote COOs to their owner */
6553   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6554   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6555   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6556   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6557   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6558   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6559   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6560 
6561   PetscCall(PetscFree(offsets));
6562   PetscCall(PetscFree2(sendto, nentries));
6563 
6564   /* Sort received COOs by row along with the permutation array     */
6565   for (k = 0; k < n2; k++) perm2[k] = k;
6566   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6567 
6568   /* Split received COOs into diag/offdiag portions                 */
6569   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6570   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6571   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6572 
6573   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6574   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6575 
6576   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6577   PetscInt *Ai, *Bi;
6578   PetscInt *Aj, *Bj;
6579 
6580   PetscCall(PetscMalloc1(m + 1, &Ai));
6581   PetscCall(PetscMalloc1(m + 1, &Bi));
6582   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6583   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6584 
6585   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6586   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6587   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6588   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6589   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6590 
6591   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6592   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6593 
6594   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6595   /* expect nonzeros in A/B most likely have local contributing entries        */
6596   PetscInt    Annz = Ai[m];
6597   PetscInt    Bnnz = Bi[m];
6598   PetscCount *Ajmap1_new, *Bjmap1_new;
6599 
6600   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6601   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6602 
6603   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6604   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6605 
6606   PetscCall(PetscFree(Aimap1));
6607   PetscCall(PetscFree(Ajmap1));
6608   PetscCall(PetscFree(Bimap1));
6609   PetscCall(PetscFree(Bjmap1));
6610   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6611   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6612   PetscCall(PetscFree(perm1));
6613   PetscCall(PetscFree3(i2, j2, perm2));
6614 
6615   Ajmap1 = Ajmap1_new;
6616   Bjmap1 = Bjmap1_new;
6617 
6618   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6619   if (Annz < Annz1 + Annz2) {
6620     PetscInt *Aj_new;
6621     PetscCall(PetscMalloc1(Annz, &Aj_new));
6622     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6623     PetscCall(PetscFree(Aj));
6624     Aj = Aj_new;
6625   }
6626 
6627   if (Bnnz < Bnnz1 + Bnnz2) {
6628     PetscInt *Bj_new;
6629     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6630     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6631     PetscCall(PetscFree(Bj));
6632     Bj = Bj_new;
6633   }
6634 
6635   /* Create new submatrices for on-process and off-process coupling                  */
6636   PetscScalar *Aa, *Ba;
6637   MatType      rtype;
6638   Mat_SeqAIJ  *a, *b;
6639   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6640   PetscCall(PetscCalloc1(Bnnz, &Ba));
6641   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6642   if (cstart) {
6643     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6644   }
6645   PetscCall(MatDestroy(&mpiaij->A));
6646   PetscCall(MatDestroy(&mpiaij->B));
6647   PetscCall(MatGetRootType_Private(mat, &rtype));
6648   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6649   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6650   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6651 
6652   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6653   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6654   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6655   a->free_a = b->free_a = PETSC_TRUE;
6656   a->free_ij = b->free_ij = PETSC_TRUE;
6657 
6658   /* conversion must happen AFTER multiply setup */
6659   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6660   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6661   PetscCall(VecDestroy(&mpiaij->lvec));
6662   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6663 
6664   mpiaij->coo_n   = coo_n;
6665   mpiaij->coo_sf  = sf2;
6666   mpiaij->sendlen = nleaves;
6667   mpiaij->recvlen = nroots;
6668 
6669   mpiaij->Annz = Annz;
6670   mpiaij->Bnnz = Bnnz;
6671 
6672   mpiaij->Annz2 = Annz2;
6673   mpiaij->Bnnz2 = Bnnz2;
6674 
6675   mpiaij->Atot1 = Atot1;
6676   mpiaij->Atot2 = Atot2;
6677   mpiaij->Btot1 = Btot1;
6678   mpiaij->Btot2 = Btot2;
6679 
6680   mpiaij->Ajmap1 = Ajmap1;
6681   mpiaij->Aperm1 = Aperm1;
6682 
6683   mpiaij->Bjmap1 = Bjmap1;
6684   mpiaij->Bperm1 = Bperm1;
6685 
6686   mpiaij->Aimap2 = Aimap2;
6687   mpiaij->Ajmap2 = Ajmap2;
6688   mpiaij->Aperm2 = Aperm2;
6689 
6690   mpiaij->Bimap2 = Bimap2;
6691   mpiaij->Bjmap2 = Bjmap2;
6692   mpiaij->Bperm2 = Bperm2;
6693 
6694   mpiaij->Cperm1 = Cperm1;
6695 
6696   /* Allocate in preallocation. If not used, it has zero cost on host */
6697   PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf));
6698   PetscFunctionReturn(PETSC_SUCCESS);
6699 }
6700 
6701 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6702 {
6703   Mat_MPIAIJ       *mpiaij = (Mat_MPIAIJ *)mat->data;
6704   Mat               A = mpiaij->A, B = mpiaij->B;
6705   PetscCount        Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2;
6706   PetscScalar      *Aa, *Ba;
6707   PetscScalar      *sendbuf = mpiaij->sendbuf;
6708   PetscScalar      *recvbuf = mpiaij->recvbuf;
6709   const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2;
6710   const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2;
6711   const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2;
6712   const PetscCount *Cperm1 = mpiaij->Cperm1;
6713 
6714   PetscFunctionBegin;
6715   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6716   PetscCall(MatSeqAIJGetArray(B, &Ba));
6717 
6718   /* Pack entries to be sent to remote */
6719   for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6720 
6721   /* Send remote entries to their owner and overlap the communication with local computation */
6722   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6723   /* Add local entries to A and B */
6724   for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6725     PetscScalar sum = 0.0;                /* Do partial summation first to improve numerical stability */
6726     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6727     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6728   }
6729   for (PetscCount i = 0; i < Bnnz; i++) {
6730     PetscScalar sum = 0.0;
6731     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6732     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6733   }
6734   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6735 
6736   /* Add received remote entries to A and B */
6737   for (PetscCount i = 0; i < Annz2; i++) {
6738     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6739   }
6740   for (PetscCount i = 0; i < Bnnz2; i++) {
6741     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6742   }
6743   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6744   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6745   PetscFunctionReturn(PETSC_SUCCESS);
6746 }
6747 
6748 /*MC
6749    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6750 
6751    Options Database Keys:
6752 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6753 
6754    Level: beginner
6755 
6756    Notes:
6757    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6758     in this case the values associated with the rows and columns one passes in are set to zero
6759     in the matrix
6760 
6761     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6762     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6763 
6764 .seealso: [](chapter_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6765 M*/
6766 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6767 {
6768   Mat_MPIAIJ *b;
6769   PetscMPIInt size;
6770 
6771   PetscFunctionBegin;
6772   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6773 
6774   PetscCall(PetscNew(&b));
6775   B->data = (void *)b;
6776   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
6777   B->assembled  = PETSC_FALSE;
6778   B->insertmode = NOT_SET_VALUES;
6779   b->size       = size;
6780 
6781   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6782 
6783   /* build cache for off array entries formed */
6784   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6785 
6786   b->donotstash  = PETSC_FALSE;
6787   b->colmap      = NULL;
6788   b->garray      = NULL;
6789   b->roworiented = PETSC_TRUE;
6790 
6791   /* stuff used for matrix vector multiply */
6792   b->lvec  = NULL;
6793   b->Mvctx = NULL;
6794 
6795   /* stuff for MatGetRow() */
6796   b->rowindices   = NULL;
6797   b->rowvalues    = NULL;
6798   b->getrowactive = PETSC_FALSE;
6799 
6800   /* flexible pointer used in CUSPARSE classes */
6801   b->spptr = NULL;
6802 
6803   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6804   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6805   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6806   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6807   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6808   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6809   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6810   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6811   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6812   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6813 #if defined(PETSC_HAVE_CUDA)
6814   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6815 #endif
6816 #if defined(PETSC_HAVE_HIP)
6817   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6818 #endif
6819 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6820   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6821 #endif
6822 #if defined(PETSC_HAVE_MKL_SPARSE)
6823   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6824 #endif
6825   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6826   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6827   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6828   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6829 #if defined(PETSC_HAVE_ELEMENTAL)
6830   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6831 #endif
6832 #if defined(PETSC_HAVE_SCALAPACK)
6833   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6834 #endif
6835   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6836   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6837 #if defined(PETSC_HAVE_HYPRE)
6838   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6839   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6840 #endif
6841   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6842   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6843   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6844   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6845   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6846   PetscFunctionReturn(PETSC_SUCCESS);
6847 }
6848 
6849 /*@C
6850      MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6851          and "off-diagonal" part of the matrix in CSR format.
6852 
6853    Collective
6854 
6855    Input Parameters:
6856 +  comm - MPI communicator
6857 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
6858 .  n - This value should be the same as the local size used in creating the
6859        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6860        calculated if `N` is given) For square matrices `n` is almost always `m`.
6861 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6862 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6863 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6864 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6865 .   a - matrix values
6866 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6867 .   oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6868 -   oa - matrix values
6869 
6870    Output Parameter:
6871 .   mat - the matrix
6872 
6873    Level: advanced
6874 
6875    Notes:
6876        The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6877        must free the arrays once the matrix has been destroyed and not before.
6878 
6879        The `i` and `j` indices are 0 based
6880 
6881        See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6882 
6883        This sets local rows and cannot be used to set off-processor values.
6884 
6885        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6886        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6887        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6888        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6889        keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6890        communication if it is known that only local entries will be set.
6891 
6892 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6893           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6894 @*/
6895 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6896 {
6897   Mat_MPIAIJ *maij;
6898 
6899   PetscFunctionBegin;
6900   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6901   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6902   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6903   PetscCall(MatCreate(comm, mat));
6904   PetscCall(MatSetSizes(*mat, m, n, M, N));
6905   PetscCall(MatSetType(*mat, MATMPIAIJ));
6906   maij = (Mat_MPIAIJ *)(*mat)->data;
6907 
6908   (*mat)->preallocated = PETSC_TRUE;
6909 
6910   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6911   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6912 
6913   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6914   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6915 
6916   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6917   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6918   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6919   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6920   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6921   PetscFunctionReturn(PETSC_SUCCESS);
6922 }
6923 
6924 typedef struct {
6925   Mat       *mp;    /* intermediate products */
6926   PetscBool *mptmp; /* is the intermediate product temporary ? */
6927   PetscInt   cp;    /* number of intermediate products */
6928 
6929   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6930   PetscInt    *startsj_s, *startsj_r;
6931   PetscScalar *bufa;
6932   Mat          P_oth;
6933 
6934   /* may take advantage of merging product->B */
6935   Mat Bloc; /* B-local by merging diag and off-diag */
6936 
6937   /* cusparse does not have support to split between symbolic and numeric phases.
6938      When api_user is true, we don't need to update the numerical values
6939      of the temporary storage */
6940   PetscBool reusesym;
6941 
6942   /* support for COO values insertion */
6943   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6944   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6945   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6946   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6947   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6948   PetscMemType mtype;
6949 
6950   /* customization */
6951   PetscBool abmerge;
6952   PetscBool P_oth_bind;
6953 } MatMatMPIAIJBACKEND;
6954 
6955 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6956 {
6957   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6958   PetscInt             i;
6959 
6960   PetscFunctionBegin;
6961   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6962   PetscCall(PetscFree(mmdata->bufa));
6963   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6964   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6965   PetscCall(MatDestroy(&mmdata->P_oth));
6966   PetscCall(MatDestroy(&mmdata->Bloc));
6967   PetscCall(PetscSFDestroy(&mmdata->sf));
6968   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
6969   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
6970   PetscCall(PetscFree(mmdata->own[0]));
6971   PetscCall(PetscFree(mmdata->own));
6972   PetscCall(PetscFree(mmdata->off[0]));
6973   PetscCall(PetscFree(mmdata->off));
6974   PetscCall(PetscFree(mmdata));
6975   PetscFunctionReturn(PETSC_SUCCESS);
6976 }
6977 
6978 /* Copy selected n entries with indices in idx[] of A to v[].
6979    If idx is NULL, copy the whole data array of A to v[]
6980  */
6981 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6982 {
6983   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
6984 
6985   PetscFunctionBegin;
6986   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
6987   if (f) {
6988     PetscCall((*f)(A, n, idx, v));
6989   } else {
6990     const PetscScalar *vv;
6991 
6992     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
6993     if (n && idx) {
6994       PetscScalar    *w  = v;
6995       const PetscInt *oi = idx;
6996       PetscInt        j;
6997 
6998       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6999     } else {
7000       PetscCall(PetscArraycpy(v, vv, n));
7001     }
7002     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7003   }
7004   PetscFunctionReturn(PETSC_SUCCESS);
7005 }
7006 
7007 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7008 {
7009   MatMatMPIAIJBACKEND *mmdata;
7010   PetscInt             i, n_d, n_o;
7011 
7012   PetscFunctionBegin;
7013   MatCheckProduct(C, 1);
7014   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7015   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7016   if (!mmdata->reusesym) { /* update temporary matrices */
7017     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7018     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7019   }
7020   mmdata->reusesym = PETSC_FALSE;
7021 
7022   for (i = 0; i < mmdata->cp; i++) {
7023     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7024     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7025   }
7026   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7027     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7028 
7029     if (mmdata->mptmp[i]) continue;
7030     if (noff) {
7031       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7032 
7033       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7034       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7035       n_o += noff;
7036       n_d += nown;
7037     } else {
7038       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7039 
7040       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7041       n_d += mm->nz;
7042     }
7043   }
7044   if (mmdata->hasoffproc) { /* offprocess insertion */
7045     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7046     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7047   }
7048   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7049   PetscFunctionReturn(PETSC_SUCCESS);
7050 }
7051 
7052 /* Support for Pt * A, A * P, or Pt * A * P */
7053 #define MAX_NUMBER_INTERMEDIATE 4
7054 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7055 {
7056   Mat_Product           *product = C->product;
7057   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7058   Mat_MPIAIJ            *a, *p;
7059   MatMatMPIAIJBACKEND   *mmdata;
7060   ISLocalToGlobalMapping P_oth_l2g = NULL;
7061   IS                     glob      = NULL;
7062   const char            *prefix;
7063   char                   pprefix[256];
7064   const PetscInt        *globidx, *P_oth_idx;
7065   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7066   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7067   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7068                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7069                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7070   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7071 
7072   MatProductType ptype;
7073   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7074   PetscMPIInt    size;
7075 
7076   PetscFunctionBegin;
7077   MatCheckProduct(C, 1);
7078   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7079   ptype = product->type;
7080   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7081     ptype                                          = MATPRODUCT_AB;
7082     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7083   }
7084   switch (ptype) {
7085   case MATPRODUCT_AB:
7086     A          = product->A;
7087     P          = product->B;
7088     m          = A->rmap->n;
7089     n          = P->cmap->n;
7090     M          = A->rmap->N;
7091     N          = P->cmap->N;
7092     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7093     break;
7094   case MATPRODUCT_AtB:
7095     P          = product->A;
7096     A          = product->B;
7097     m          = P->cmap->n;
7098     n          = A->cmap->n;
7099     M          = P->cmap->N;
7100     N          = A->cmap->N;
7101     hasoffproc = PETSC_TRUE;
7102     break;
7103   case MATPRODUCT_PtAP:
7104     A          = product->A;
7105     P          = product->B;
7106     m          = P->cmap->n;
7107     n          = P->cmap->n;
7108     M          = P->cmap->N;
7109     N          = P->cmap->N;
7110     hasoffproc = PETSC_TRUE;
7111     break;
7112   default:
7113     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7114   }
7115   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7116   if (size == 1) hasoffproc = PETSC_FALSE;
7117 
7118   /* defaults */
7119   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7120     mp[i]    = NULL;
7121     mptmp[i] = PETSC_FALSE;
7122     rmapt[i] = -1;
7123     cmapt[i] = -1;
7124     rmapa[i] = NULL;
7125     cmapa[i] = NULL;
7126   }
7127 
7128   /* customization */
7129   PetscCall(PetscNew(&mmdata));
7130   mmdata->reusesym = product->api_user;
7131   if (ptype == MATPRODUCT_AB) {
7132     if (product->api_user) {
7133       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7134       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7135       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7136       PetscOptionsEnd();
7137     } else {
7138       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7139       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7140       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7141       PetscOptionsEnd();
7142     }
7143   } else if (ptype == MATPRODUCT_PtAP) {
7144     if (product->api_user) {
7145       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7146       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7147       PetscOptionsEnd();
7148     } else {
7149       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7150       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7151       PetscOptionsEnd();
7152     }
7153   }
7154   a = (Mat_MPIAIJ *)A->data;
7155   p = (Mat_MPIAIJ *)P->data;
7156   PetscCall(MatSetSizes(C, m, n, M, N));
7157   PetscCall(PetscLayoutSetUp(C->rmap));
7158   PetscCall(PetscLayoutSetUp(C->cmap));
7159   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7160   PetscCall(MatGetOptionsPrefix(C, &prefix));
7161 
7162   cp = 0;
7163   switch (ptype) {
7164   case MATPRODUCT_AB: /* A * P */
7165     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7166 
7167     /* A_diag * P_local (merged or not) */
7168     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7169       /* P is product->B */
7170       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7171       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7172       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7173       PetscCall(MatProductSetFill(mp[cp], product->fill));
7174       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7175       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7176       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7177       mp[cp]->product->api_user = product->api_user;
7178       PetscCall(MatProductSetFromOptions(mp[cp]));
7179       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7180       PetscCall(ISGetIndices(glob, &globidx));
7181       rmapt[cp] = 1;
7182       cmapt[cp] = 2;
7183       cmapa[cp] = globidx;
7184       mptmp[cp] = PETSC_FALSE;
7185       cp++;
7186     } else { /* A_diag * P_diag and A_diag * P_off */
7187       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7188       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7189       PetscCall(MatProductSetFill(mp[cp], product->fill));
7190       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7191       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7192       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7193       mp[cp]->product->api_user = product->api_user;
7194       PetscCall(MatProductSetFromOptions(mp[cp]));
7195       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7196       rmapt[cp] = 1;
7197       cmapt[cp] = 1;
7198       mptmp[cp] = PETSC_FALSE;
7199       cp++;
7200       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7201       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7202       PetscCall(MatProductSetFill(mp[cp], product->fill));
7203       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7204       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7205       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7206       mp[cp]->product->api_user = product->api_user;
7207       PetscCall(MatProductSetFromOptions(mp[cp]));
7208       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7209       rmapt[cp] = 1;
7210       cmapt[cp] = 2;
7211       cmapa[cp] = p->garray;
7212       mptmp[cp] = PETSC_FALSE;
7213       cp++;
7214     }
7215 
7216     /* A_off * P_other */
7217     if (mmdata->P_oth) {
7218       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7219       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7220       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7221       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7222       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7223       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7224       PetscCall(MatProductSetFill(mp[cp], product->fill));
7225       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7226       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7227       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7228       mp[cp]->product->api_user = product->api_user;
7229       PetscCall(MatProductSetFromOptions(mp[cp]));
7230       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7231       rmapt[cp] = 1;
7232       cmapt[cp] = 2;
7233       cmapa[cp] = P_oth_idx;
7234       mptmp[cp] = PETSC_FALSE;
7235       cp++;
7236     }
7237     break;
7238 
7239   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7240     /* A is product->B */
7241     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7242     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7243       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7244       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7245       PetscCall(MatProductSetFill(mp[cp], product->fill));
7246       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7247       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7248       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7249       mp[cp]->product->api_user = product->api_user;
7250       PetscCall(MatProductSetFromOptions(mp[cp]));
7251       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7252       PetscCall(ISGetIndices(glob, &globidx));
7253       rmapt[cp] = 2;
7254       rmapa[cp] = globidx;
7255       cmapt[cp] = 2;
7256       cmapa[cp] = globidx;
7257       mptmp[cp] = PETSC_FALSE;
7258       cp++;
7259     } else {
7260       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7261       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7262       PetscCall(MatProductSetFill(mp[cp], product->fill));
7263       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7264       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7265       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7266       mp[cp]->product->api_user = product->api_user;
7267       PetscCall(MatProductSetFromOptions(mp[cp]));
7268       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7269       PetscCall(ISGetIndices(glob, &globidx));
7270       rmapt[cp] = 1;
7271       cmapt[cp] = 2;
7272       cmapa[cp] = globidx;
7273       mptmp[cp] = PETSC_FALSE;
7274       cp++;
7275       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7276       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7277       PetscCall(MatProductSetFill(mp[cp], product->fill));
7278       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7279       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7280       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7281       mp[cp]->product->api_user = product->api_user;
7282       PetscCall(MatProductSetFromOptions(mp[cp]));
7283       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7284       rmapt[cp] = 2;
7285       rmapa[cp] = p->garray;
7286       cmapt[cp] = 2;
7287       cmapa[cp] = globidx;
7288       mptmp[cp] = PETSC_FALSE;
7289       cp++;
7290     }
7291     break;
7292   case MATPRODUCT_PtAP:
7293     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7294     /* P is product->B */
7295     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7296     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7297     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7298     PetscCall(MatProductSetFill(mp[cp], product->fill));
7299     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7300     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7301     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7302     mp[cp]->product->api_user = product->api_user;
7303     PetscCall(MatProductSetFromOptions(mp[cp]));
7304     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7305     PetscCall(ISGetIndices(glob, &globidx));
7306     rmapt[cp] = 2;
7307     rmapa[cp] = globidx;
7308     cmapt[cp] = 2;
7309     cmapa[cp] = globidx;
7310     mptmp[cp] = PETSC_FALSE;
7311     cp++;
7312     if (mmdata->P_oth) {
7313       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7314       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7315       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7316       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7317       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7318       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7319       PetscCall(MatProductSetFill(mp[cp], product->fill));
7320       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7321       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7322       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7323       mp[cp]->product->api_user = product->api_user;
7324       PetscCall(MatProductSetFromOptions(mp[cp]));
7325       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7326       mptmp[cp] = PETSC_TRUE;
7327       cp++;
7328       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7329       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7330       PetscCall(MatProductSetFill(mp[cp], product->fill));
7331       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7332       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7333       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7334       mp[cp]->product->api_user = product->api_user;
7335       PetscCall(MatProductSetFromOptions(mp[cp]));
7336       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7337       rmapt[cp] = 2;
7338       rmapa[cp] = globidx;
7339       cmapt[cp] = 2;
7340       cmapa[cp] = P_oth_idx;
7341       mptmp[cp] = PETSC_FALSE;
7342       cp++;
7343     }
7344     break;
7345   default:
7346     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7347   }
7348   /* sanity check */
7349   if (size > 1)
7350     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7351 
7352   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7353   for (i = 0; i < cp; i++) {
7354     mmdata->mp[i]    = mp[i];
7355     mmdata->mptmp[i] = mptmp[i];
7356   }
7357   mmdata->cp             = cp;
7358   C->product->data       = mmdata;
7359   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7360   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7361 
7362   /* memory type */
7363   mmdata->mtype = PETSC_MEMTYPE_HOST;
7364   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7365   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7366   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7367   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7368   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7369   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7370 
7371   /* prepare coo coordinates for values insertion */
7372 
7373   /* count total nonzeros of those intermediate seqaij Mats
7374     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7375     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7376     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7377   */
7378   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7379     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7380     if (mptmp[cp]) continue;
7381     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7382       const PetscInt *rmap = rmapa[cp];
7383       const PetscInt  mr   = mp[cp]->rmap->n;
7384       const PetscInt  rs   = C->rmap->rstart;
7385       const PetscInt  re   = C->rmap->rend;
7386       const PetscInt *ii   = mm->i;
7387       for (i = 0; i < mr; i++) {
7388         const PetscInt gr = rmap[i];
7389         const PetscInt nz = ii[i + 1] - ii[i];
7390         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7391         else ncoo_oown += nz;                  /* this row is local */
7392       }
7393     } else ncoo_d += mm->nz;
7394   }
7395 
7396   /*
7397     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7398 
7399     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7400 
7401     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7402 
7403     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7404     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7405     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7406 
7407     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7408     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7409   */
7410   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7411   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7412 
7413   /* gather (i,j) of nonzeros inserted by remote procs */
7414   if (hasoffproc) {
7415     PetscSF  msf;
7416     PetscInt ncoo2, *coo_i2, *coo_j2;
7417 
7418     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7419     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7420     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7421 
7422     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7423       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7424       PetscInt   *idxoff = mmdata->off[cp];
7425       PetscInt   *idxown = mmdata->own[cp];
7426       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7427         const PetscInt *rmap = rmapa[cp];
7428         const PetscInt *cmap = cmapa[cp];
7429         const PetscInt *ii   = mm->i;
7430         PetscInt       *coi  = coo_i + ncoo_o;
7431         PetscInt       *coj  = coo_j + ncoo_o;
7432         const PetscInt  mr   = mp[cp]->rmap->n;
7433         const PetscInt  rs   = C->rmap->rstart;
7434         const PetscInt  re   = C->rmap->rend;
7435         const PetscInt  cs   = C->cmap->rstart;
7436         for (i = 0; i < mr; i++) {
7437           const PetscInt *jj = mm->j + ii[i];
7438           const PetscInt  gr = rmap[i];
7439           const PetscInt  nz = ii[i + 1] - ii[i];
7440           if (gr < rs || gr >= re) { /* this is an offproc row */
7441             for (j = ii[i]; j < ii[i + 1]; j++) {
7442               *coi++    = gr;
7443               *idxoff++ = j;
7444             }
7445             if (!cmapt[cp]) { /* already global */
7446               for (j = 0; j < nz; j++) *coj++ = jj[j];
7447             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7448               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7449             } else { /* offdiag */
7450               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7451             }
7452             ncoo_o += nz;
7453           } else { /* this is a local row */
7454             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7455           }
7456         }
7457       }
7458       mmdata->off[cp + 1] = idxoff;
7459       mmdata->own[cp + 1] = idxown;
7460     }
7461 
7462     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7463     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7464     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7465     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7466     ncoo = ncoo_d + ncoo_oown + ncoo2;
7467     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7468     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7469     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7470     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7471     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7472     PetscCall(PetscFree2(coo_i, coo_j));
7473     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7474     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7475     coo_i = coo_i2;
7476     coo_j = coo_j2;
7477   } else { /* no offproc values insertion */
7478     ncoo = ncoo_d;
7479     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7480 
7481     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7482     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7483     PetscCall(PetscSFSetUp(mmdata->sf));
7484   }
7485   mmdata->hasoffproc = hasoffproc;
7486 
7487   /* gather (i,j) of nonzeros inserted locally */
7488   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7489     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7490     PetscInt       *coi  = coo_i + ncoo_d;
7491     PetscInt       *coj  = coo_j + ncoo_d;
7492     const PetscInt *jj   = mm->j;
7493     const PetscInt *ii   = mm->i;
7494     const PetscInt *cmap = cmapa[cp];
7495     const PetscInt *rmap = rmapa[cp];
7496     const PetscInt  mr   = mp[cp]->rmap->n;
7497     const PetscInt  rs   = C->rmap->rstart;
7498     const PetscInt  re   = C->rmap->rend;
7499     const PetscInt  cs   = C->cmap->rstart;
7500 
7501     if (mptmp[cp]) continue;
7502     if (rmapt[cp] == 1) { /* consecutive rows */
7503       /* fill coo_i */
7504       for (i = 0; i < mr; i++) {
7505         const PetscInt gr = i + rs;
7506         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7507       }
7508       /* fill coo_j */
7509       if (!cmapt[cp]) { /* type-0, already global */
7510         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7511       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7512         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7513       } else {                                            /* type-2, local to global for sparse columns */
7514         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7515       }
7516       ncoo_d += mm->nz;
7517     } else if (rmapt[cp] == 2) { /* sparse rows */
7518       for (i = 0; i < mr; i++) {
7519         const PetscInt *jj = mm->j + ii[i];
7520         const PetscInt  gr = rmap[i];
7521         const PetscInt  nz = ii[i + 1] - ii[i];
7522         if (gr >= rs && gr < re) { /* local rows */
7523           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7524           if (!cmapt[cp]) { /* type-0, already global */
7525             for (j = 0; j < nz; j++) *coj++ = jj[j];
7526           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7527             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7528           } else { /* type-2, local to global for sparse columns */
7529             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7530           }
7531           ncoo_d += nz;
7532         }
7533       }
7534     }
7535   }
7536   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7537   PetscCall(ISDestroy(&glob));
7538   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7539   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7540   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7541   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7542 
7543   /* preallocate with COO data */
7544   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7545   PetscCall(PetscFree2(coo_i, coo_j));
7546   PetscFunctionReturn(PETSC_SUCCESS);
7547 }
7548 
7549 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7550 {
7551   Mat_Product *product = mat->product;
7552 #if defined(PETSC_HAVE_DEVICE)
7553   PetscBool match  = PETSC_FALSE;
7554   PetscBool usecpu = PETSC_FALSE;
7555 #else
7556   PetscBool match = PETSC_TRUE;
7557 #endif
7558 
7559   PetscFunctionBegin;
7560   MatCheckProduct(mat, 1);
7561 #if defined(PETSC_HAVE_DEVICE)
7562   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7563   if (match) { /* we can always fallback to the CPU if requested */
7564     switch (product->type) {
7565     case MATPRODUCT_AB:
7566       if (product->api_user) {
7567         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7568         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7569         PetscOptionsEnd();
7570       } else {
7571         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7572         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7573         PetscOptionsEnd();
7574       }
7575       break;
7576     case MATPRODUCT_AtB:
7577       if (product->api_user) {
7578         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7579         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7580         PetscOptionsEnd();
7581       } else {
7582         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7583         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7584         PetscOptionsEnd();
7585       }
7586       break;
7587     case MATPRODUCT_PtAP:
7588       if (product->api_user) {
7589         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7590         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7591         PetscOptionsEnd();
7592       } else {
7593         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7594         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7595         PetscOptionsEnd();
7596       }
7597       break;
7598     default:
7599       break;
7600     }
7601     match = (PetscBool)!usecpu;
7602   }
7603 #endif
7604   if (match) {
7605     switch (product->type) {
7606     case MATPRODUCT_AB:
7607     case MATPRODUCT_AtB:
7608     case MATPRODUCT_PtAP:
7609       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7610       break;
7611     default:
7612       break;
7613     }
7614   }
7615   /* fallback to MPIAIJ ops */
7616   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7617   PetscFunctionReturn(PETSC_SUCCESS);
7618 }
7619 
7620 /*
7621    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7622 
7623    n - the number of block indices in cc[]
7624    cc - the block indices (must be large enough to contain the indices)
7625 */
7626 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7627 {
7628   PetscInt        cnt = -1, nidx, j;
7629   const PetscInt *idx;
7630 
7631   PetscFunctionBegin;
7632   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7633   if (nidx) {
7634     cnt     = 0;
7635     cc[cnt] = idx[0] / bs;
7636     for (j = 1; j < nidx; j++) {
7637       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7638     }
7639   }
7640   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7641   *n = cnt + 1;
7642   PetscFunctionReturn(PETSC_SUCCESS);
7643 }
7644 
7645 /*
7646     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7647 
7648     ncollapsed - the number of block indices
7649     collapsed - the block indices (must be large enough to contain the indices)
7650 */
7651 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7652 {
7653   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7654 
7655   PetscFunctionBegin;
7656   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7657   for (i = start + 1; i < start + bs; i++) {
7658     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7659     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7660     cprevtmp = cprev;
7661     cprev    = merged;
7662     merged   = cprevtmp;
7663   }
7664   *ncollapsed = nprev;
7665   if (collapsed) *collapsed = cprev;
7666   PetscFunctionReturn(PETSC_SUCCESS);
7667 }
7668 
7669 /*
7670    This will eventually be folded into MatCreateGraph_AIJ() for optimal performance
7671 */
7672 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG)
7673 {
7674   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7675   Mat                tGmat;
7676   MPI_Comm           comm;
7677   const PetscScalar *vals;
7678   const PetscInt    *idx;
7679   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7680   MatScalar         *AA; // this is checked in graph
7681   PetscBool          isseqaij;
7682   Mat                a, b, c;
7683   MatType            jtype;
7684 
7685   PetscFunctionBegin;
7686   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
7687   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
7688   PetscCall(MatGetType(Gmat, &jtype));
7689   PetscCall(MatCreate(comm, &tGmat));
7690   PetscCall(MatSetType(tGmat, jtype));
7691 
7692   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7693                Also, if the matrix is symmetric, can we skip this
7694                operation? It can be very expensive on large matrices. */
7695 
7696   // global sizes
7697   PetscCall(MatGetSize(Gmat, &MM, &NN));
7698   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7699   nloc = Iend - Istart;
7700   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
7701   if (isseqaij) {
7702     a = Gmat;
7703     b = NULL;
7704   } else {
7705     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7706     a             = d->A;
7707     b             = d->B;
7708     garray        = d->garray;
7709   }
7710   /* Determine upper bound on non-zeros needed in new filtered matrix */
7711   for (PetscInt row = 0; row < nloc; row++) {
7712     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
7713     d_nnz[row] = ncols;
7714     if (ncols > maxcols) maxcols = ncols;
7715     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
7716   }
7717   if (b) {
7718     for (PetscInt row = 0; row < nloc; row++) {
7719       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
7720       o_nnz[row] = ncols;
7721       if (ncols > maxcols) maxcols = ncols;
7722       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
7723     }
7724   }
7725   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
7726   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7727   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
7728   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
7729   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7730   PetscCall(PetscFree2(d_nnz, o_nnz));
7731   //
7732   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
7733   nnz0 = nnz1 = 0;
7734   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7735     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7736       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
7737       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7738         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7739         if (PetscRealPart(sv) > vfilter) {
7740           nnz1++;
7741           PetscInt cid = idx[jj] + Istart; //diag
7742           if (c != a) cid = garray[idx[jj]];
7743           AA[ncol_row] = vals[jj];
7744           AJ[ncol_row] = cid;
7745           ncol_row++;
7746         }
7747       }
7748       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
7749       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
7750     }
7751   }
7752   PetscCall(PetscFree2(AA, AJ));
7753   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
7754   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
7755   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
7756 
7757   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
7758 
7759   *filteredG = tGmat;
7760   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7761   PetscFunctionReturn(PETSC_SUCCESS);
7762 }
7763 
7764 /*
7765  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7766 
7767  Input Parameter:
7768  . Amat - matrix
7769  - symmetrize - make the result symmetric
7770  + scale - scale with diagonal
7771 
7772  Output Parameter:
7773  . a_Gmat - output scalar graph >= 0
7774 
7775 */
7776 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat)
7777 {
7778   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7779   MPI_Comm  comm;
7780   Mat       Gmat;
7781   PetscBool ismpiaij, isseqaij;
7782   Mat       a, b, c;
7783   MatType   jtype;
7784 
7785   PetscFunctionBegin;
7786   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7787   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7788   PetscCall(MatGetSize(Amat, &MM, &NN));
7789   PetscCall(MatGetBlockSize(Amat, &bs));
7790   nloc = (Iend - Istart) / bs;
7791 
7792   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7793   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7794   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7795 
7796   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7797   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7798      implementation */
7799   if (bs > 1) {
7800     PetscCall(MatGetType(Amat, &jtype));
7801     PetscCall(MatCreate(comm, &Gmat));
7802     PetscCall(MatSetType(Gmat, jtype));
7803     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7804     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7805     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7806       PetscInt  *d_nnz, *o_nnz;
7807       MatScalar *aa, val, *AA;
7808       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7809       if (isseqaij) {
7810         a = Amat;
7811         b = NULL;
7812       } else {
7813         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7814         a             = d->A;
7815         b             = d->B;
7816       }
7817       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7818       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7819       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7820         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7821         const PetscInt *cols1, *cols2;
7822         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7823           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7824           nnz[brow / bs] = nc2 / bs;
7825           if (nc2 % bs) ok = 0;
7826           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7827           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7828             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7829             if (nc1 != nc2) ok = 0;
7830             else {
7831               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7832                 if (cols1[jj] != cols2[jj]) ok = 0;
7833                 if (cols1[jj] % bs != jj % bs) ok = 0;
7834               }
7835             }
7836             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7837           }
7838           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7839           if (!ok) {
7840             PetscCall(PetscFree2(d_nnz, o_nnz));
7841             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7842             goto old_bs;
7843           }
7844         }
7845       }
7846       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7847       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7848       PetscCall(PetscFree2(d_nnz, o_nnz));
7849       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7850       // diag
7851       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7852         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7853         ai               = aseq->i;
7854         n                = ai[brow + 1] - ai[brow];
7855         aj               = aseq->j + ai[brow];
7856         for (int k = 0; k < n; k += bs) {        // block columns
7857           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7858           val        = 0;
7859           for (int ii = 0; ii < bs; ii++) { // rows in block
7860             aa = aseq->a + ai[brow + ii] + k;
7861             for (int jj = 0; jj < bs; jj++) {         // columns in block
7862               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7863             }
7864           }
7865           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7866           AA[k / bs] = val;
7867         }
7868         grow = Istart / bs + brow / bs;
7869         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7870       }
7871       // off-diag
7872       if (ismpiaij) {
7873         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7874         const PetscScalar *vals;
7875         const PetscInt    *cols, *garray = aij->garray;
7876         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7877         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7878           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7879           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7880             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7881             AA[k / bs] = 0;
7882             AJ[cidx]   = garray[cols[k]] / bs;
7883           }
7884           nc = ncols / bs;
7885           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7886           for (int ii = 0; ii < bs; ii++) { // rows in block
7887             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7888             for (int k = 0; k < ncols; k += bs) {
7889               for (int jj = 0; jj < bs; jj++) { // cols in block
7890                 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7891                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7892               }
7893             }
7894             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7895           }
7896           grow = Istart / bs + brow / bs;
7897           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7898         }
7899       }
7900       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7901       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7902       PetscCall(PetscFree2(AA, AJ));
7903     } else {
7904       const PetscScalar *vals;
7905       const PetscInt    *idx;
7906       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7907     old_bs:
7908       /*
7909        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7910        */
7911       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7912       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7913       if (isseqaij) {
7914         PetscInt max_d_nnz;
7915         /*
7916          Determine exact preallocation count for (sequential) scalar matrix
7917          */
7918         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7919         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7920         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7921         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7922         PetscCall(PetscFree3(w0, w1, w2));
7923       } else if (ismpiaij) {
7924         Mat             Daij, Oaij;
7925         const PetscInt *garray;
7926         PetscInt        max_d_nnz;
7927         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7928         /*
7929          Determine exact preallocation count for diagonal block portion of scalar matrix
7930          */
7931         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7932         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7933         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7934         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7935         PetscCall(PetscFree3(w0, w1, w2));
7936         /*
7937          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7938          */
7939         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7940           o_nnz[jj] = 0;
7941           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7942             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7943             o_nnz[jj] += ncols;
7944             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7945           }
7946           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7947         }
7948       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7949       /* get scalar copy (norms) of matrix */
7950       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7951       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7952       PetscCall(PetscFree2(d_nnz, o_nnz));
7953       for (Ii = Istart; Ii < Iend; Ii++) {
7954         PetscInt dest_row = Ii / bs;
7955         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7956         for (jj = 0; jj < ncols; jj++) {
7957           PetscInt    dest_col = idx[jj] / bs;
7958           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7959           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7960         }
7961         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7962       }
7963       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7964       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7965     }
7966   } else {
7967     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7968     else {
7969       Gmat = Amat;
7970       PetscCall(PetscObjectReference((PetscObject)Gmat));
7971     }
7972     if (isseqaij) {
7973       a = Gmat;
7974       b = NULL;
7975     } else {
7976       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7977       a             = d->A;
7978       b             = d->B;
7979     }
7980     if (filter >= 0 || scale) {
7981       /* take absolute value of each entry */
7982       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7983         MatInfo      info;
7984         PetscScalar *avals;
7985         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7986         PetscCall(MatSeqAIJGetArray(c, &avals));
7987         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7988         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7989       }
7990     }
7991   }
7992   if (symmetrize) {
7993     PetscBool isset, issym;
7994     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7995     if (!isset || !issym) {
7996       Mat matTrans;
7997       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7998       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7999       PetscCall(MatDestroy(&matTrans));
8000     }
8001     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8002   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8003   if (scale) {
8004     /* scale c for all diagonal values = 1 or -1 */
8005     Vec diag;
8006     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8007     PetscCall(MatGetDiagonal(Gmat, diag));
8008     PetscCall(VecReciprocal(diag));
8009     PetscCall(VecSqrtAbs(diag));
8010     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8011     PetscCall(VecDestroy(&diag));
8012   }
8013   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8014 
8015   if (filter >= 0) {
8016     Mat Fmat = NULL; /* some silly compiler needs this */
8017 
8018     PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat));
8019     PetscCall(MatDestroy(&Gmat));
8020     Gmat = Fmat;
8021   }
8022   *a_Gmat = Gmat;
8023   PetscFunctionReturn(PETSC_SUCCESS);
8024 }
8025 
8026 /*
8027     Special version for direct calls from Fortran
8028 */
8029 #include <petsc/private/fortranimpl.h>
8030 
8031 /* Change these macros so can be used in void function */
8032 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8033 #undef PetscCall
8034 #define PetscCall(...) \
8035   do { \
8036     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8037     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8038       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8039       return; \
8040     } \
8041   } while (0)
8042 
8043 #undef SETERRQ
8044 #define SETERRQ(comm, ierr, ...) \
8045   do { \
8046     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8047     return; \
8048   } while (0)
8049 
8050 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8051   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8052 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8053   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8054 #else
8055 #endif
8056 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8057 {
8058   Mat         mat = *mmat;
8059   PetscInt    m = *mm, n = *mn;
8060   InsertMode  addv = *maddv;
8061   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8062   PetscScalar value;
8063 
8064   MatCheckPreallocated(mat, 1);
8065   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8066   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8067   {
8068     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8069     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8070     PetscBool roworiented = aij->roworiented;
8071 
8072     /* Some Variables required in the macro */
8073     Mat         A     = aij->A;
8074     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8075     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8076     MatScalar  *aa;
8077     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8078     Mat         B                 = aij->B;
8079     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8080     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8081     MatScalar  *ba;
8082     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8083      * cannot use "#if defined" inside a macro. */
8084     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8085 
8086     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8087     PetscInt   nonew = a->nonew;
8088     MatScalar *ap1, *ap2;
8089 
8090     PetscFunctionBegin;
8091     PetscCall(MatSeqAIJGetArray(A, &aa));
8092     PetscCall(MatSeqAIJGetArray(B, &ba));
8093     for (i = 0; i < m; i++) {
8094       if (im[i] < 0) continue;
8095       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8096       if (im[i] >= rstart && im[i] < rend) {
8097         row      = im[i] - rstart;
8098         lastcol1 = -1;
8099         rp1      = aj + ai[row];
8100         ap1      = aa + ai[row];
8101         rmax1    = aimax[row];
8102         nrow1    = ailen[row];
8103         low1     = 0;
8104         high1    = nrow1;
8105         lastcol2 = -1;
8106         rp2      = bj + bi[row];
8107         ap2      = ba + bi[row];
8108         rmax2    = bimax[row];
8109         nrow2    = bilen[row];
8110         low2     = 0;
8111         high2    = nrow2;
8112 
8113         for (j = 0; j < n; j++) {
8114           if (roworiented) value = v[i * n + j];
8115           else value = v[i + j * m];
8116           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8117           if (in[j] >= cstart && in[j] < cend) {
8118             col = in[j] - cstart;
8119             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8120           } else if (in[j] < 0) continue;
8121           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8122             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8123           } else {
8124             if (mat->was_assembled) {
8125               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8126 #if defined(PETSC_USE_CTABLE)
8127               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8128               col--;
8129 #else
8130               col = aij->colmap[in[j]] - 1;
8131 #endif
8132               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8133                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8134                 col = in[j];
8135                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8136                 B        = aij->B;
8137                 b        = (Mat_SeqAIJ *)B->data;
8138                 bimax    = b->imax;
8139                 bi       = b->i;
8140                 bilen    = b->ilen;
8141                 bj       = b->j;
8142                 rp2      = bj + bi[row];
8143                 ap2      = ba + bi[row];
8144                 rmax2    = bimax[row];
8145                 nrow2    = bilen[row];
8146                 low2     = 0;
8147                 high2    = nrow2;
8148                 bm       = aij->B->rmap->n;
8149                 ba       = b->a;
8150                 inserted = PETSC_FALSE;
8151               }
8152             } else col = in[j];
8153             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8154           }
8155         }
8156       } else if (!aij->donotstash) {
8157         if (roworiented) {
8158           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8159         } else {
8160           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8161         }
8162       }
8163     }
8164     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8165     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8166   }
8167   PetscFunctionReturnVoid();
8168 }
8169 
8170 /* Undefining these here since they were redefined from their original definition above! No
8171  * other PETSc functions should be defined past this point, as it is impossible to recover the
8172  * original definitions */
8173 #undef PetscCall
8174 #undef SETERRQ
8175