xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c77c71ff2d9eaa2c74538bf9bf94eff01b512dbf)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14 #if defined(PETSC_USE_LOG)
15   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
16 #endif
17   PetscCall(MatStashDestroy_Private(&mat->stash));
18   PetscCall(VecDestroy(&aij->diag));
19   PetscCall(MatDestroy(&aij->A));
20   PetscCall(MatDestroy(&aij->B));
21 #if defined(PETSC_USE_CTABLE)
22   PetscCall(PetscHMapIDestroy(&aij->colmap));
23 #else
24   PetscCall(PetscFree(aij->colmap));
25 #endif
26   PetscCall(PetscFree(aij->garray));
27   PetscCall(VecDestroy(&aij->lvec));
28   PetscCall(VecScatterDestroy(&aij->Mvctx));
29   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
30   PetscCall(PetscFree(aij->ld));
31 
32   /* Free COO */
33   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
34 
35   PetscCall(PetscFree(mat->data));
36 
37   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
38   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
39 
40   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
50 #if defined(PETSC_HAVE_CUDA)
51   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
52 #endif
53 #if defined(PETSC_HAVE_HIP)
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
55 #endif
56 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
57   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
58 #endif
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
60 #if defined(PETSC_HAVE_ELEMENTAL)
61   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
62 #endif
63 #if defined(PETSC_HAVE_SCALAPACK)
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
65 #endif
66 #if defined(PETSC_HAVE_HYPRE)
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
69 #endif
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
76 #if defined(PETSC_HAVE_MKL_SPARSE)
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
78 #endif
79   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
80   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
82   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
84   PetscFunctionReturn(PETSC_SUCCESS);
85 }
86 
87 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
88 #define TYPE AIJ
89 #define TYPE_AIJ
90 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
91 #undef TYPE
92 #undef TYPE_AIJ
93 
94 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
95 {
96   Mat B;
97 
98   PetscFunctionBegin;
99   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
100   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
101   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
102   PetscCall(MatDestroy(&B));
103   PetscFunctionReturn(PETSC_SUCCESS);
104 }
105 
106 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
107 {
108   Mat B;
109 
110   PetscFunctionBegin;
111   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
112   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
113   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 /*MC
118    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
119 
120    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
121    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
122   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
123   for communicators controlling multiple processes.  It is recommended that you call both of
124   the above preallocation routines for simplicity.
125 
126    Options Database Key:
127 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
128 
129   Developer Note:
130   Level: beginner
131 
132     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
133    enough exist.
134 
135 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
136 M*/
137 
138 /*MC
139    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
140 
141    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
142    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
143    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
144   for communicators controlling multiple processes.  It is recommended that you call both of
145   the above preallocation routines for simplicity.
146 
147    Options Database Key:
148 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
149 
150   Level: beginner
151 
152 .seealso: [](chapter_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
153 M*/
154 
155 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
156 {
157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
158 
159   PetscFunctionBegin;
160 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
161   A->boundtocpu = flg;
162 #endif
163   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
164   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
165 
166   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
167    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
168    * to differ from the parent matrix. */
169   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
170   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
171 
172   PetscFunctionReturn(PETSC_SUCCESS);
173 }
174 
175 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
176 {
177   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
178 
179   PetscFunctionBegin;
180   if (mat->A) {
181     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
182     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
183   }
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
188 {
189   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
190   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
191   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
192   const PetscInt  *ia, *ib;
193   const MatScalar *aa, *bb, *aav, *bav;
194   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
195   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
196 
197   PetscFunctionBegin;
198   *keptrows = NULL;
199 
200   ia = a->i;
201   ib = b->i;
202   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
203   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
204   for (i = 0; i < m; i++) {
205     na = ia[i + 1] - ia[i];
206     nb = ib[i + 1] - ib[i];
207     if (!na && !nb) {
208       cnt++;
209       goto ok1;
210     }
211     aa = aav + ia[i];
212     for (j = 0; j < na; j++) {
213       if (aa[j] != 0.0) goto ok1;
214     }
215     bb = bav + ib[i];
216     for (j = 0; j < nb; j++) {
217       if (bb[j] != 0.0) goto ok1;
218     }
219     cnt++;
220   ok1:;
221   }
222   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
223   if (!n0rows) {
224     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
225     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
226     PetscFunctionReturn(PETSC_SUCCESS);
227   }
228   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
229   cnt = 0;
230   for (i = 0; i < m; i++) {
231     na = ia[i + 1] - ia[i];
232     nb = ib[i + 1] - ib[i];
233     if (!na && !nb) continue;
234     aa = aav + ia[i];
235     for (j = 0; j < na; j++) {
236       if (aa[j] != 0.0) {
237         rows[cnt++] = rstart + i;
238         goto ok2;
239       }
240     }
241     bb = bav + ib[i];
242     for (j = 0; j < nb; j++) {
243       if (bb[j] != 0.0) {
244         rows[cnt++] = rstart + i;
245         goto ok2;
246       }
247     }
248   ok2:;
249   }
250   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
251   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
252   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
253   PetscFunctionReturn(PETSC_SUCCESS);
254 }
255 
256 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
257 {
258   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
259   PetscBool   cong;
260 
261   PetscFunctionBegin;
262   PetscCall(MatHasCongruentLayouts(Y, &cong));
263   if (Y->assembled && cong) {
264     PetscCall(MatDiagonalSet(aij->A, D, is));
265   } else {
266     PetscCall(MatDiagonalSet_Default(Y, D, is));
267   }
268   PetscFunctionReturn(PETSC_SUCCESS);
269 }
270 
271 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
272 {
273   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
274   PetscInt    i, rstart, nrows, *rows;
275 
276   PetscFunctionBegin;
277   *zrows = NULL;
278   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
279   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
280   for (i = 0; i < nrows; i++) rows[i] += rstart;
281   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
282   PetscFunctionReturn(PETSC_SUCCESS);
283 }
284 
285 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
286 {
287   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
288   PetscInt           i, m, n, *garray = aij->garray;
289   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
290   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
291   PetscReal         *work;
292   const PetscScalar *dummy;
293 
294   PetscFunctionBegin;
295   PetscCall(MatGetSize(A, &m, &n));
296   PetscCall(PetscCalloc1(n, &work));
297   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
298   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
299   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
300   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
301   if (type == NORM_2) {
302     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
303     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
304   } else if (type == NORM_1) {
305     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
306     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
307   } else if (type == NORM_INFINITY) {
308     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
309     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
310   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
311     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
312     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
313   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
314     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
315     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
316   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
317   if (type == NORM_INFINITY) {
318     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
319   } else {
320     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
321   }
322   PetscCall(PetscFree(work));
323   if (type == NORM_2) {
324     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
325   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
326     for (i = 0; i < n; i++) reductions[i] /= m;
327   }
328   PetscFunctionReturn(PETSC_SUCCESS);
329 }
330 
331 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
332 {
333   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
334   IS              sis, gis;
335   const PetscInt *isis, *igis;
336   PetscInt        n, *iis, nsis, ngis, rstart, i;
337 
338   PetscFunctionBegin;
339   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
340   PetscCall(MatFindNonzeroRows(a->B, &gis));
341   PetscCall(ISGetSize(gis, &ngis));
342   PetscCall(ISGetSize(sis, &nsis));
343   PetscCall(ISGetIndices(sis, &isis));
344   PetscCall(ISGetIndices(gis, &igis));
345 
346   PetscCall(PetscMalloc1(ngis + nsis, &iis));
347   PetscCall(PetscArraycpy(iis, igis, ngis));
348   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
349   n = ngis + nsis;
350   PetscCall(PetscSortRemoveDupsInt(&n, iis));
351   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
352   for (i = 0; i < n; i++) iis[i] += rstart;
353   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
354 
355   PetscCall(ISRestoreIndices(sis, &isis));
356   PetscCall(ISRestoreIndices(gis, &igis));
357   PetscCall(ISDestroy(&sis));
358   PetscCall(ISDestroy(&gis));
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to access.
368 */
369 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
370 {
371   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
372   PetscInt    n   = aij->B->cmap->n, i;
373 
374   PetscFunctionBegin;
375   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
376 #if defined(PETSC_USE_CTABLE)
377   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
378   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
379 #else
380   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
381   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
382 #endif
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
387   { \
388     if (col <= lastcol1) low1 = 0; \
389     else high1 = nrow1; \
390     lastcol1 = col; \
391     while (high1 - low1 > 5) { \
392       t = (low1 + high1) / 2; \
393       if (rp1[t] > col) high1 = t; \
394       else low1 = t; \
395     } \
396     for (_i = low1; _i < high1; _i++) { \
397       if (rp1[_i] > col) break; \
398       if (rp1[_i] == col) { \
399         if (addv == ADD_VALUES) { \
400           ap1[_i] += value; \
401           /* Not sure LogFlops will slow dow the code or not */ \
402           (void)PetscLogFlops(1.0); \
403         } else ap1[_i] = value; \
404         goto a_noinsert; \
405       } \
406     } \
407     if (value == 0.0 && ignorezeroentries && row != col) { \
408       low1  = 0; \
409       high1 = nrow1; \
410       goto a_noinsert; \
411     } \
412     if (nonew == 1) { \
413       low1  = 0; \
414       high1 = nrow1; \
415       goto a_noinsert; \
416     } \
417     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
418     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
419     N = nrow1++ - 1; \
420     a->nz++; \
421     high1++; \
422     /* shift up all the later entries in this row */ \
423     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
424     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
425     rp1[_i] = col; \
426     ap1[_i] = value; \
427     A->nonzerostate++; \
428   a_noinsert:; \
429     ailen[row] = nrow1; \
430   }
431 
432 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
433   { \
434     if (col <= lastcol2) low2 = 0; \
435     else high2 = nrow2; \
436     lastcol2 = col; \
437     while (high2 - low2 > 5) { \
438       t = (low2 + high2) / 2; \
439       if (rp2[t] > col) high2 = t; \
440       else low2 = t; \
441     } \
442     for (_i = low2; _i < high2; _i++) { \
443       if (rp2[_i] > col) break; \
444       if (rp2[_i] == col) { \
445         if (addv == ADD_VALUES) { \
446           ap2[_i] += value; \
447           (void)PetscLogFlops(1.0); \
448         } else ap2[_i] = value; \
449         goto b_noinsert; \
450       } \
451     } \
452     if (value == 0.0 && ignorezeroentries) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     if (nonew == 1) { \
458       low2  = 0; \
459       high2 = nrow2; \
460       goto b_noinsert; \
461     } \
462     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
463     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
464     N = nrow2++ - 1; \
465     b->nz++; \
466     high2++; \
467     /* shift up all the later entries in this row */ \
468     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
469     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
470     rp2[_i] = col; \
471     ap2[_i] = value; \
472     B->nonzerostate++; \
473   b_noinsert:; \
474     bilen[row] = nrow2; \
475   }
476 
477 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
478 {
479   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
480   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
481   PetscInt     l, *garray                         = mat->garray, diag;
482   PetscScalar *aa, *ba;
483 
484   PetscFunctionBegin;
485   /* code only works for square matrices A */
486 
487   /* find size of row to the left of the diagonal part */
488   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
489   row = row - diag;
490   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
491     if (garray[b->j[b->i[row] + l]] > diag) break;
492   }
493   if (l) {
494     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
495     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
496     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
497   }
498 
499   /* diagonal part */
500   if (a->i[row + 1] - a->i[row]) {
501     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
502     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
503     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
504   }
505 
506   /* right of diagonal part */
507   if (b->i[row + 1] - b->i[row] - l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512   PetscFunctionReturn(PETSC_SUCCESS);
513 }
514 
515 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
516 {
517   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
518   PetscScalar value = 0.0;
519   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
520   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
521   PetscBool   roworiented = aij->roworiented;
522 
523   /* Some Variables required in the macro */
524   Mat         A     = aij->A;
525   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
526   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
527   PetscBool   ignorezeroentries = a->ignorezeroentries;
528   Mat         B                 = aij->B;
529   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
530   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
531   MatScalar  *aa, *ba;
532   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
533   PetscInt    nonew;
534   MatScalar  *ap1, *ap2;
535 
536   PetscFunctionBegin;
537   PetscCall(MatSeqAIJGetArray(A, &aa));
538   PetscCall(MatSeqAIJGetArray(B, &ba));
539   for (i = 0; i < m; i++) {
540     if (im[i] < 0) continue;
541     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
542     if (im[i] >= rstart && im[i] < rend) {
543       row      = im[i] - rstart;
544       lastcol1 = -1;
545       rp1      = aj + ai[row];
546       ap1      = aa + ai[row];
547       rmax1    = aimax[row];
548       nrow1    = ailen[row];
549       low1     = 0;
550       high1    = nrow1;
551       lastcol2 = -1;
552       rp2      = bj + bi[row];
553       ap2      = ba + bi[row];
554       rmax2    = bimax[row];
555       nrow2    = bilen[row];
556       low2     = 0;
557       high2    = nrow2;
558 
559       for (j = 0; j < n; j++) {
560         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
561         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
562         if (in[j] >= cstart && in[j] < cend) {
563           col   = in[j] - cstart;
564           nonew = a->nonew;
565           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
566         } else if (in[j] < 0) {
567           continue;
568         } else {
569           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
570           if (mat->was_assembled) {
571             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
572 #if defined(PETSC_USE_CTABLE)
573             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
574             col--;
575 #else
576             col = aij->colmap[in[j]] - 1;
577 #endif
578             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
579               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
580               col = in[j];
581               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
582               B     = aij->B;
583               b     = (Mat_SeqAIJ *)B->data;
584               bimax = b->imax;
585               bi    = b->i;
586               bilen = b->ilen;
587               bj    = b->j;
588               ba    = b->a;
589               rp2   = bj + bi[row];
590               ap2   = ba + bi[row];
591               rmax2 = bimax[row];
592               nrow2 = bilen[row];
593               low2  = 0;
594               high2 = nrow2;
595               bm    = aij->B->rmap->n;
596               ba    = b->a;
597             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
598               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
599                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
600               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
601             }
602           } else col = in[j];
603           nonew = b->nonew;
604           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
605         }
606       }
607     } else {
608       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
609       if (!aij->donotstash) {
610         mat->assembled = PETSC_FALSE;
611         if (roworiented) {
612           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
613         } else {
614           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
615         }
616       }
617     }
618   }
619   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
620   PetscCall(MatSeqAIJRestoreArray(B, &ba));
621   PetscFunctionReturn(PETSC_SUCCESS);
622 }
623 
624 /*
625     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
626     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
627     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
628 */
629 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
630 {
631   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
632   Mat         A      = aij->A; /* diagonal part of the matrix */
633   Mat         B      = aij->B; /* offdiagonal part of the matrix */
634   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
635   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
636   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
637   PetscInt   *ailen = a->ilen, *aj = a->j;
638   PetscInt   *bilen = b->ilen, *bj = b->j;
639   PetscInt    am          = aij->A->rmap->n, j;
640   PetscInt    diag_so_far = 0, dnz;
641   PetscInt    offd_so_far = 0, onz;
642 
643   PetscFunctionBegin;
644   /* Iterate over all rows of the matrix */
645   for (j = 0; j < am; j++) {
646     dnz = onz = 0;
647     /*  Iterate over all non-zero columns of the current row */
648     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
649       /* If column is in the diagonal */
650       if (mat_j[col] >= cstart && mat_j[col] < cend) {
651         aj[diag_so_far++] = mat_j[col] - cstart;
652         dnz++;
653       } else { /* off-diagonal entries */
654         bj[offd_so_far++] = mat_j[col];
655         onz++;
656       }
657     }
658     ailen[j] = dnz;
659     bilen[j] = onz;
660   }
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 /*
665     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
666     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
667     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
668     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
669     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
670 */
671 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
672 {
673   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
674   Mat          A    = aij->A; /* diagonal part of the matrix */
675   Mat          B    = aij->B; /* offdiagonal part of the matrix */
676   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
677   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
678   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
679   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
680   PetscInt    *ailen = a->ilen, *aj = a->j;
681   PetscInt    *bilen = b->ilen, *bj = b->j;
682   PetscInt     am          = aij->A->rmap->n, j;
683   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
684   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
685   PetscScalar *aa = a->a, *ba = b->a;
686 
687   PetscFunctionBegin;
688   /* Iterate over all rows of the matrix */
689   for (j = 0; j < am; j++) {
690     dnz_row = onz_row = 0;
691     rowstart_offd     = full_offd_i[j];
692     rowstart_diag     = full_diag_i[j];
693     /*  Iterate over all non-zero columns of the current row */
694     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
695       /* If column is in the diagonal */
696       if (mat_j[col] >= cstart && mat_j[col] < cend) {
697         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
698         aa[rowstart_diag + dnz_row] = mat_a[col];
699         dnz_row++;
700       } else { /* off-diagonal entries */
701         bj[rowstart_offd + onz_row] = mat_j[col];
702         ba[rowstart_offd + onz_row] = mat_a[col];
703         onz_row++;
704       }
705     }
706     ailen[j] = dnz_row;
707     bilen[j] = onz_row;
708   }
709   PetscFunctionReturn(PETSC_SUCCESS);
710 }
711 
712 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
713 {
714   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
715   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
716   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
717 
718   PetscFunctionBegin;
719   for (i = 0; i < m; i++) {
720     if (idxm[i] < 0) continue; /* negative row */
721     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
722     if (idxm[i] >= rstart && idxm[i] < rend) {
723       row = idxm[i] - rstart;
724       for (j = 0; j < n; j++) {
725         if (idxn[j] < 0) continue; /* negative column */
726         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
727         if (idxn[j] >= cstart && idxn[j] < cend) {
728           col = idxn[j] - cstart;
729           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
730         } else {
731           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
732 #if defined(PETSC_USE_CTABLE)
733           PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
734           col--;
735 #else
736           col = aij->colmap[idxn[j]] - 1;
737 #endif
738           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
739           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
740         }
741       }
742     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
743   }
744   PetscFunctionReturn(PETSC_SUCCESS);
745 }
746 
747 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
748 {
749   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
750   PetscInt    nstash, reallocs;
751 
752   PetscFunctionBegin;
753   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
754 
755   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
756   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
757   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
758   PetscFunctionReturn(PETSC_SUCCESS);
759 }
760 
761 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
762 {
763   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
764   PetscMPIInt  n;
765   PetscInt     i, j, rstart, ncols, flg;
766   PetscInt    *row, *col;
767   PetscBool    other_disassembled;
768   PetscScalar *val;
769 
770   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
771 
772   PetscFunctionBegin;
773   if (!aij->donotstash && !mat->nooffprocentries) {
774     while (1) {
775       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
776       if (!flg) break;
777 
778       for (i = 0; i < n;) {
779         /* Now identify the consecutive vals belonging to the same row */
780         for (j = i, rstart = row[j]; j < n; j++) {
781           if (row[j] != rstart) break;
782         }
783         if (j < n) ncols = j - i;
784         else ncols = n - i;
785         /* Now assemble all these values with a single function call */
786         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
787         i = j;
788       }
789     }
790     PetscCall(MatStashScatterEnd_Private(&mat->stash));
791   }
792 #if defined(PETSC_HAVE_DEVICE)
793   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
794   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
795   if (mat->boundtocpu) {
796     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
797     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
798   }
799 #endif
800   PetscCall(MatAssemblyBegin(aij->A, mode));
801   PetscCall(MatAssemblyEnd(aij->A, mode));
802 
803   /* determine if any processor has disassembled, if so we must
804      also disassemble ourself, in order that we may reassemble. */
805   /*
806      if nonzero structure of submatrix B cannot change then we know that
807      no processor disassembled thus we can skip this stuff
808   */
809   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
810     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
811     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
812       PetscCall(MatDisAssemble_MPIAIJ(mat));
813     }
814   }
815   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
816   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
817 #if defined(PETSC_HAVE_DEVICE)
818   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
819 #endif
820   PetscCall(MatAssemblyBegin(aij->B, mode));
821   PetscCall(MatAssemblyEnd(aij->B, mode));
822 
823   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
824 
825   aij->rowvalues = NULL;
826 
827   PetscCall(VecDestroy(&aij->diag));
828 
829   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
830   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
831     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
832     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
833   }
834 #if defined(PETSC_HAVE_DEVICE)
835   mat->offloadmask = PETSC_OFFLOAD_BOTH;
836 #endif
837   PetscFunctionReturn(PETSC_SUCCESS);
838 }
839 
840 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
841 {
842   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
843 
844   PetscFunctionBegin;
845   PetscCall(MatZeroEntries(l->A));
846   PetscCall(MatZeroEntries(l->B));
847   PetscFunctionReturn(PETSC_SUCCESS);
848 }
849 
850 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
851 {
852   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
853   PetscObjectState sA, sB;
854   PetscInt        *lrows;
855   PetscInt         r, len;
856   PetscBool        cong, lch, gch;
857 
858   PetscFunctionBegin;
859   /* get locally owned rows */
860   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
861   PetscCall(MatHasCongruentLayouts(A, &cong));
862   /* fix right hand side if needed */
863   if (x && b) {
864     const PetscScalar *xx;
865     PetscScalar       *bb;
866 
867     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
868     PetscCall(VecGetArrayRead(x, &xx));
869     PetscCall(VecGetArray(b, &bb));
870     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
871     PetscCall(VecRestoreArrayRead(x, &xx));
872     PetscCall(VecRestoreArray(b, &bb));
873   }
874 
875   sA = mat->A->nonzerostate;
876   sB = mat->B->nonzerostate;
877 
878   if (diag != 0.0 && cong) {
879     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
880     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
881   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
882     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
883     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
884     PetscInt    nnwA, nnwB;
885     PetscBool   nnzA, nnzB;
886 
887     nnwA = aijA->nonew;
888     nnwB = aijB->nonew;
889     nnzA = aijA->keepnonzeropattern;
890     nnzB = aijB->keepnonzeropattern;
891     if (!nnzA) {
892       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
893       aijA->nonew = 0;
894     }
895     if (!nnzB) {
896       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
897       aijB->nonew = 0;
898     }
899     /* Must zero here before the next loop */
900     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
901     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
902     for (r = 0; r < len; ++r) {
903       const PetscInt row = lrows[r] + A->rmap->rstart;
904       if (row >= A->cmap->N) continue;
905       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
906     }
907     aijA->nonew = nnwA;
908     aijB->nonew = nnwB;
909   } else {
910     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
911     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
912   }
913   PetscCall(PetscFree(lrows));
914   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
915   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
916 
917   /* reduce nonzerostate */
918   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
919   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
920   if (gch) A->nonzerostate++;
921   PetscFunctionReturn(PETSC_SUCCESS);
922 }
923 
924 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
925 {
926   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
927   PetscMPIInt        n = A->rmap->n;
928   PetscInt           i, j, r, m, len = 0;
929   PetscInt          *lrows, *owners = A->rmap->range;
930   PetscMPIInt        p = 0;
931   PetscSFNode       *rrows;
932   PetscSF            sf;
933   const PetscScalar *xx;
934   PetscScalar       *bb, *mask, *aij_a;
935   Vec                xmask, lmask;
936   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
937   const PetscInt    *aj, *ii, *ridx;
938   PetscScalar       *aa;
939 
940   PetscFunctionBegin;
941   /* Create SF where leaves are input rows and roots are owned rows */
942   PetscCall(PetscMalloc1(n, &lrows));
943   for (r = 0; r < n; ++r) lrows[r] = -1;
944   PetscCall(PetscMalloc1(N, &rrows));
945   for (r = 0; r < N; ++r) {
946     const PetscInt idx = rows[r];
947     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
948     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
949       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
950     }
951     rrows[r].rank  = p;
952     rrows[r].index = rows[r] - owners[p];
953   }
954   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
955   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
956   /* Collect flags for rows to be zeroed */
957   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
958   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
959   PetscCall(PetscSFDestroy(&sf));
960   /* Compress and put in row numbers */
961   for (r = 0; r < n; ++r)
962     if (lrows[r] >= 0) lrows[len++] = r;
963   /* zero diagonal part of matrix */
964   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
965   /* handle off diagonal part of matrix */
966   PetscCall(MatCreateVecs(A, &xmask, NULL));
967   PetscCall(VecDuplicate(l->lvec, &lmask));
968   PetscCall(VecGetArray(xmask, &bb));
969   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
970   PetscCall(VecRestoreArray(xmask, &bb));
971   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
972   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
973   PetscCall(VecDestroy(&xmask));
974   if (x && b) { /* this code is buggy when the row and column layout don't match */
975     PetscBool cong;
976 
977     PetscCall(MatHasCongruentLayouts(A, &cong));
978     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
979     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
980     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
981     PetscCall(VecGetArrayRead(l->lvec, &xx));
982     PetscCall(VecGetArray(b, &bb));
983   }
984   PetscCall(VecGetArray(lmask, &mask));
985   /* remove zeroed rows of off diagonal matrix */
986   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
987   ii = aij->i;
988   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
989   /* loop over all elements of off process part of matrix zeroing removed columns*/
990   if (aij->compressedrow.use) {
991     m    = aij->compressedrow.nrows;
992     ii   = aij->compressedrow.i;
993     ridx = aij->compressedrow.rindex;
994     for (i = 0; i < m; i++) {
995       n  = ii[i + 1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij_a + ii[i];
998 
999       for (j = 0; j < n; j++) {
1000         if (PetscAbsScalar(mask[*aj])) {
1001           if (b) bb[*ridx] -= *aa * xx[*aj];
1002           *aa = 0.0;
1003         }
1004         aa++;
1005         aj++;
1006       }
1007       ridx++;
1008     }
1009   } else { /* do not use compressed row format */
1010     m = l->B->rmap->n;
1011     for (i = 0; i < m; i++) {
1012       n  = ii[i + 1] - ii[i];
1013       aj = aij->j + ii[i];
1014       aa = aij_a + ii[i];
1015       for (j = 0; j < n; j++) {
1016         if (PetscAbsScalar(mask[*aj])) {
1017           if (b) bb[i] -= *aa * xx[*aj];
1018           *aa = 0.0;
1019         }
1020         aa++;
1021         aj++;
1022       }
1023     }
1024   }
1025   if (x && b) {
1026     PetscCall(VecRestoreArray(b, &bb));
1027     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1028   }
1029   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1030   PetscCall(VecRestoreArray(lmask, &mask));
1031   PetscCall(VecDestroy(&lmask));
1032   PetscCall(PetscFree(lrows));
1033 
1034   /* only change matrix nonzero state if pattern was allowed to be changed */
1035   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
1036     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1037     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1038   }
1039   PetscFunctionReturn(PETSC_SUCCESS);
1040 }
1041 
1042 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1043 {
1044   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1045   PetscInt    nt;
1046   VecScatter  Mvctx = a->Mvctx;
1047 
1048   PetscFunctionBegin;
1049   PetscCall(VecGetLocalSize(xx, &nt));
1050   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1051   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1052   PetscUseTypeMethod(a->A, mult, xx, yy);
1053   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1054   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1055   PetscFunctionReturn(PETSC_SUCCESS);
1056 }
1057 
1058 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1059 {
1060   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1064   PetscFunctionReturn(PETSC_SUCCESS);
1065 }
1066 
1067 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1068 {
1069   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1070   VecScatter  Mvctx = a->Mvctx;
1071 
1072   PetscFunctionBegin;
1073   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1074   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1075   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1076   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1077   PetscFunctionReturn(PETSC_SUCCESS);
1078 }
1079 
1080 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1081 {
1082   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1083 
1084   PetscFunctionBegin;
1085   /* do nondiagonal part */
1086   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1087   /* do local part */
1088   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1089   /* add partial results together */
1090   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1091   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1092   PetscFunctionReturn(PETSC_SUCCESS);
1093 }
1094 
1095 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1096 {
1097   MPI_Comm    comm;
1098   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1099   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1100   IS          Me, Notme;
1101   PetscInt    M, N, first, last, *notme, i;
1102   PetscBool   lf;
1103   PetscMPIInt size;
1104 
1105   PetscFunctionBegin;
1106   /* Easy test: symmetric diagonal block */
1107   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1108   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1109   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1110   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1111   PetscCallMPI(MPI_Comm_size(comm, &size));
1112   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1113 
1114   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1115   PetscCall(MatGetSize(Amat, &M, &N));
1116   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1117   PetscCall(PetscMalloc1(N - last + first, &notme));
1118   for (i = 0; i < first; i++) notme[i] = i;
1119   for (i = last; i < M; i++) notme[i - last + first] = i;
1120   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1121   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1122   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1123   Aoff = Aoffs[0];
1124   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1125   Boff = Boffs[0];
1126   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1127   PetscCall(MatDestroyMatrices(1, &Aoffs));
1128   PetscCall(MatDestroyMatrices(1, &Boffs));
1129   PetscCall(ISDestroy(&Me));
1130   PetscCall(ISDestroy(&Notme));
1131   PetscCall(PetscFree(notme));
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 
1135 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1136 {
1137   PetscFunctionBegin;
1138   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1139   PetscFunctionReturn(PETSC_SUCCESS);
1140 }
1141 
1142 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1143 {
1144   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1145 
1146   PetscFunctionBegin;
1147   /* do nondiagonal part */
1148   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1149   /* do local part */
1150   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1151   /* add partial results together */
1152   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1153   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1154   PetscFunctionReturn(PETSC_SUCCESS);
1155 }
1156 
1157 /*
1158   This only works correctly for square matrices where the subblock A->A is the
1159    diagonal block
1160 */
1161 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1162 {
1163   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1164 
1165   PetscFunctionBegin;
1166   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1167   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1168   PetscCall(MatGetDiagonal(a->A, v));
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1173 {
1174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1175 
1176   PetscFunctionBegin;
1177   PetscCall(MatScale(a->A, aa));
1178   PetscCall(MatScale(a->B, aa));
1179   PetscFunctionReturn(PETSC_SUCCESS);
1180 }
1181 
1182 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1183 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1184 {
1185   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1186 
1187   PetscFunctionBegin;
1188   PetscCall(PetscSFDestroy(&aij->coo_sf));
1189   PetscCall(PetscFree(aij->Aperm1));
1190   PetscCall(PetscFree(aij->Bperm1));
1191   PetscCall(PetscFree(aij->Ajmap1));
1192   PetscCall(PetscFree(aij->Bjmap1));
1193 
1194   PetscCall(PetscFree(aij->Aimap2));
1195   PetscCall(PetscFree(aij->Bimap2));
1196   PetscCall(PetscFree(aij->Aperm2));
1197   PetscCall(PetscFree(aij->Bperm2));
1198   PetscCall(PetscFree(aij->Ajmap2));
1199   PetscCall(PetscFree(aij->Bjmap2));
1200 
1201   PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf));
1202   PetscCall(PetscFree(aij->Cperm1));
1203   PetscFunctionReturn(PETSC_SUCCESS);
1204 }
1205 
1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1207 {
1208   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1209   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1210   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1211   const PetscInt    *garray = aij->garray;
1212   const PetscScalar *aa, *ba;
1213   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1214   PetscInt64         nz, hnz;
1215   PetscInt          *rowlens;
1216   PetscInt          *colidxs;
1217   PetscScalar       *matvals;
1218   PetscMPIInt        rank;
1219 
1220   PetscFunctionBegin;
1221   PetscCall(PetscViewerSetUp(viewer));
1222 
1223   M  = mat->rmap->N;
1224   N  = mat->cmap->N;
1225   m  = mat->rmap->n;
1226   rs = mat->rmap->rstart;
1227   cs = mat->cmap->rstart;
1228   nz = A->nz + B->nz;
1229 
1230   /* write matrix header */
1231   header[0] = MAT_FILE_CLASSID;
1232   header[1] = M;
1233   header[2] = N;
1234   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1235   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1236   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1237   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1238 
1239   /* fill in and store row lengths  */
1240   PetscCall(PetscMalloc1(m, &rowlens));
1241   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1242   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1243   PetscCall(PetscFree(rowlens));
1244 
1245   /* fill in and store column indices */
1246   PetscCall(PetscMalloc1(nz, &colidxs));
1247   for (cnt = 0, i = 0; i < m; i++) {
1248     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1249       if (garray[B->j[jb]] > cs) break;
1250       colidxs[cnt++] = garray[B->j[jb]];
1251     }
1252     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1253     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1254   }
1255   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1256   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1257   PetscCall(PetscFree(colidxs));
1258 
1259   /* fill in and store nonzero values */
1260   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1261   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1262   PetscCall(PetscMalloc1(nz, &matvals));
1263   for (cnt = 0, i = 0; i < m; i++) {
1264     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1265       if (garray[B->j[jb]] > cs) break;
1266       matvals[cnt++] = ba[jb];
1267     }
1268     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1269     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1270   }
1271   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1272   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1273   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1274   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1275   PetscCall(PetscFree(matvals));
1276 
1277   /* write block size option to the viewer's .info file */
1278   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1279   PetscFunctionReturn(PETSC_SUCCESS);
1280 }
1281 
1282 #include <petscdraw.h>
1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1284 {
1285   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1286   PetscMPIInt       rank = aij->rank, size = aij->size;
1287   PetscBool         isdraw, iascii, isbinary;
1288   PetscViewer       sviewer;
1289   PetscViewerFormat format;
1290 
1291   PetscFunctionBegin;
1292   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1293   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1294   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1295   if (iascii) {
1296     PetscCall(PetscViewerGetFormat(viewer, &format));
1297     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1298       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1299       PetscCall(PetscMalloc1(size, &nz));
1300       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1301       for (i = 0; i < (PetscInt)size; i++) {
1302         nmax = PetscMax(nmax, nz[i]);
1303         nmin = PetscMin(nmin, nz[i]);
1304         navg += nz[i];
1305       }
1306       PetscCall(PetscFree(nz));
1307       navg = navg / size;
1308       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1309       PetscFunctionReturn(PETSC_SUCCESS);
1310     }
1311     PetscCall(PetscViewerGetFormat(viewer, &format));
1312     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1313       MatInfo   info;
1314       PetscInt *inodes = NULL;
1315 
1316       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1317       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1318       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1319       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1320       if (!inodes) {
1321         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1322                                                      (double)info.memory));
1323       } else {
1324         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1325                                                      (double)info.memory));
1326       }
1327       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1328       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1329       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1330       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1331       PetscCall(PetscViewerFlush(viewer));
1332       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1333       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1334       PetscCall(VecScatterView(aij->Mvctx, viewer));
1335       PetscFunctionReturn(PETSC_SUCCESS);
1336     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1337       PetscInt inodecount, inodelimit, *inodes;
1338       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1339       if (inodes) {
1340         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1341       } else {
1342         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1343       }
1344       PetscFunctionReturn(PETSC_SUCCESS);
1345     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1346       PetscFunctionReturn(PETSC_SUCCESS);
1347     }
1348   } else if (isbinary) {
1349     if (size == 1) {
1350       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1351       PetscCall(MatView(aij->A, viewer));
1352     } else {
1353       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1354     }
1355     PetscFunctionReturn(PETSC_SUCCESS);
1356   } else if (iascii && size == 1) {
1357     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1358     PetscCall(MatView(aij->A, viewer));
1359     PetscFunctionReturn(PETSC_SUCCESS);
1360   } else if (isdraw) {
1361     PetscDraw draw;
1362     PetscBool isnull;
1363     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1364     PetscCall(PetscDrawIsNull(draw, &isnull));
1365     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1366   }
1367 
1368   { /* assemble the entire matrix onto first processor */
1369     Mat A = NULL, Av;
1370     IS  isrow, iscol;
1371 
1372     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1373     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1374     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1375     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1376     /*  The commented code uses MatCreateSubMatrices instead */
1377     /*
1378     Mat *AA, A = NULL, Av;
1379     IS  isrow,iscol;
1380 
1381     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1382     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1383     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1384     if (rank == 0) {
1385        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1386        A    = AA[0];
1387        Av   = AA[0];
1388     }
1389     PetscCall(MatDestroySubMatrices(1,&AA));
1390 */
1391     PetscCall(ISDestroy(&iscol));
1392     PetscCall(ISDestroy(&isrow));
1393     /*
1394        Everyone has to call to draw the matrix since the graphics waits are
1395        synchronized across all processors that share the PetscDraw object
1396     */
1397     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1398     if (rank == 0) {
1399       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1400       PetscCall(MatView_SeqAIJ(Av, sviewer));
1401     }
1402     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1403     PetscCall(PetscViewerFlush(viewer));
1404     PetscCall(MatDestroy(&A));
1405   }
1406   PetscFunctionReturn(PETSC_SUCCESS);
1407 }
1408 
1409 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1410 {
1411   PetscBool iascii, isdraw, issocket, isbinary;
1412 
1413   PetscFunctionBegin;
1414   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1415   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1416   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1417   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1418   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1419   PetscFunctionReturn(PETSC_SUCCESS);
1420 }
1421 
1422 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1423 {
1424   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1425   Vec         bb1 = NULL;
1426   PetscBool   hasop;
1427 
1428   PetscFunctionBegin;
1429   if (flag == SOR_APPLY_UPPER) {
1430     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1431     PetscFunctionReturn(PETSC_SUCCESS);
1432   }
1433 
1434   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1435 
1436   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1437     if (flag & SOR_ZERO_INITIAL_GUESS) {
1438       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1439       its--;
1440     }
1441 
1442     while (its--) {
1443       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1444       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1445 
1446       /* update rhs: bb1 = bb - B*x */
1447       PetscCall(VecScale(mat->lvec, -1.0));
1448       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1449 
1450       /* local sweep */
1451       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1452     }
1453   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1454     if (flag & SOR_ZERO_INITIAL_GUESS) {
1455       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1456       its--;
1457     }
1458     while (its--) {
1459       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1460       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1461 
1462       /* update rhs: bb1 = bb - B*x */
1463       PetscCall(VecScale(mat->lvec, -1.0));
1464       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1465 
1466       /* local sweep */
1467       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1468     }
1469   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1470     if (flag & SOR_ZERO_INITIAL_GUESS) {
1471       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1472       its--;
1473     }
1474     while (its--) {
1475       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1476       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1477 
1478       /* update rhs: bb1 = bb - B*x */
1479       PetscCall(VecScale(mat->lvec, -1.0));
1480       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1481 
1482       /* local sweep */
1483       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1484     }
1485   } else if (flag & SOR_EISENSTAT) {
1486     Vec xx1;
1487 
1488     PetscCall(VecDuplicate(bb, &xx1));
1489     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1490 
1491     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1492     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1493     if (!mat->diag) {
1494       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1495       PetscCall(MatGetDiagonal(matin, mat->diag));
1496     }
1497     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1498     if (hasop) {
1499       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1500     } else {
1501       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1502     }
1503     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1504 
1505     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1506 
1507     /* local sweep */
1508     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1509     PetscCall(VecAXPY(xx, 1.0, xx1));
1510     PetscCall(VecDestroy(&xx1));
1511   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1512 
1513   PetscCall(VecDestroy(&bb1));
1514 
1515   matin->factorerrortype = mat->A->factorerrortype;
1516   PetscFunctionReturn(PETSC_SUCCESS);
1517 }
1518 
1519 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1520 {
1521   Mat             aA, aB, Aperm;
1522   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1523   PetscScalar    *aa, *ba;
1524   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1525   PetscSF         rowsf, sf;
1526   IS              parcolp = NULL;
1527   PetscBool       done;
1528 
1529   PetscFunctionBegin;
1530   PetscCall(MatGetLocalSize(A, &m, &n));
1531   PetscCall(ISGetIndices(rowp, &rwant));
1532   PetscCall(ISGetIndices(colp, &cwant));
1533   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1534 
1535   /* Invert row permutation to find out where my rows should go */
1536   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1537   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1538   PetscCall(PetscSFSetFromOptions(rowsf));
1539   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1540   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1541   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1542 
1543   /* Invert column permutation to find out where my columns should go */
1544   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1545   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1546   PetscCall(PetscSFSetFromOptions(sf));
1547   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1548   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1549   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1550   PetscCall(PetscSFDestroy(&sf));
1551 
1552   PetscCall(ISRestoreIndices(rowp, &rwant));
1553   PetscCall(ISRestoreIndices(colp, &cwant));
1554   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1555 
1556   /* Find out where my gcols should go */
1557   PetscCall(MatGetSize(aB, NULL, &ng));
1558   PetscCall(PetscMalloc1(ng, &gcdest));
1559   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1560   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1561   PetscCall(PetscSFSetFromOptions(sf));
1562   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1563   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1564   PetscCall(PetscSFDestroy(&sf));
1565 
1566   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1567   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1568   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1569   for (i = 0; i < m; i++) {
1570     PetscInt    row = rdest[i];
1571     PetscMPIInt rowner;
1572     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1573     for (j = ai[i]; j < ai[i + 1]; j++) {
1574       PetscInt    col = cdest[aj[j]];
1575       PetscMPIInt cowner;
1576       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1577       if (rowner == cowner) dnnz[i]++;
1578       else onnz[i]++;
1579     }
1580     for (j = bi[i]; j < bi[i + 1]; j++) {
1581       PetscInt    col = gcdest[bj[j]];
1582       PetscMPIInt cowner;
1583       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1584       if (rowner == cowner) dnnz[i]++;
1585       else onnz[i]++;
1586     }
1587   }
1588   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1589   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1590   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1591   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1592   PetscCall(PetscSFDestroy(&rowsf));
1593 
1594   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1595   PetscCall(MatSeqAIJGetArray(aA, &aa));
1596   PetscCall(MatSeqAIJGetArray(aB, &ba));
1597   for (i = 0; i < m; i++) {
1598     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1599     PetscInt  j0, rowlen;
1600     rowlen = ai[i + 1] - ai[i];
1601     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1602       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1603       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1604     }
1605     rowlen = bi[i + 1] - bi[i];
1606     for (j0 = j = 0; j < rowlen; j0 = j) {
1607       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1608       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1609     }
1610   }
1611   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1612   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1613   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1614   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1615   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1616   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1617   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1618   PetscCall(PetscFree3(work, rdest, cdest));
1619   PetscCall(PetscFree(gcdest));
1620   if (parcolp) PetscCall(ISDestroy(&colp));
1621   *B = Aperm;
1622   PetscFunctionReturn(PETSC_SUCCESS);
1623 }
1624 
1625 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1626 {
1627   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1628 
1629   PetscFunctionBegin;
1630   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1631   if (ghosts) *ghosts = aij->garray;
1632   PetscFunctionReturn(PETSC_SUCCESS);
1633 }
1634 
1635 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1636 {
1637   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1638   Mat            A = mat->A, B = mat->B;
1639   PetscLogDouble isend[5], irecv[5];
1640 
1641   PetscFunctionBegin;
1642   info->block_size = 1.0;
1643   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1644 
1645   isend[0] = info->nz_used;
1646   isend[1] = info->nz_allocated;
1647   isend[2] = info->nz_unneeded;
1648   isend[3] = info->memory;
1649   isend[4] = info->mallocs;
1650 
1651   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1652 
1653   isend[0] += info->nz_used;
1654   isend[1] += info->nz_allocated;
1655   isend[2] += info->nz_unneeded;
1656   isend[3] += info->memory;
1657   isend[4] += info->mallocs;
1658   if (flag == MAT_LOCAL) {
1659     info->nz_used      = isend[0];
1660     info->nz_allocated = isend[1];
1661     info->nz_unneeded  = isend[2];
1662     info->memory       = isend[3];
1663     info->mallocs      = isend[4];
1664   } else if (flag == MAT_GLOBAL_MAX) {
1665     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1666 
1667     info->nz_used      = irecv[0];
1668     info->nz_allocated = irecv[1];
1669     info->nz_unneeded  = irecv[2];
1670     info->memory       = irecv[3];
1671     info->mallocs      = irecv[4];
1672   } else if (flag == MAT_GLOBAL_SUM) {
1673     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1674 
1675     info->nz_used      = irecv[0];
1676     info->nz_allocated = irecv[1];
1677     info->nz_unneeded  = irecv[2];
1678     info->memory       = irecv[3];
1679     info->mallocs      = irecv[4];
1680   }
1681   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1682   info->fill_ratio_needed = 0;
1683   info->factor_mallocs    = 0;
1684   PetscFunctionReturn(PETSC_SUCCESS);
1685 }
1686 
1687 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1688 {
1689   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1690 
1691   PetscFunctionBegin;
1692   switch (op) {
1693   case MAT_NEW_NONZERO_LOCATIONS:
1694   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1695   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1696   case MAT_KEEP_NONZERO_PATTERN:
1697   case MAT_NEW_NONZERO_LOCATION_ERR:
1698   case MAT_USE_INODES:
1699   case MAT_IGNORE_ZERO_ENTRIES:
1700   case MAT_FORM_EXPLICIT_TRANSPOSE:
1701     MatCheckPreallocated(A, 1);
1702     PetscCall(MatSetOption(a->A, op, flg));
1703     PetscCall(MatSetOption(a->B, op, flg));
1704     break;
1705   case MAT_ROW_ORIENTED:
1706     MatCheckPreallocated(A, 1);
1707     a->roworiented = flg;
1708 
1709     PetscCall(MatSetOption(a->A, op, flg));
1710     PetscCall(MatSetOption(a->B, op, flg));
1711     break;
1712   case MAT_FORCE_DIAGONAL_ENTRIES:
1713   case MAT_SORTED_FULL:
1714     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1715     break;
1716   case MAT_IGNORE_OFF_PROC_ENTRIES:
1717     a->donotstash = flg;
1718     break;
1719   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1720   case MAT_SPD:
1721   case MAT_SYMMETRIC:
1722   case MAT_STRUCTURALLY_SYMMETRIC:
1723   case MAT_HERMITIAN:
1724   case MAT_SYMMETRY_ETERNAL:
1725   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1726   case MAT_SPD_ETERNAL:
1727     /* if the diagonal matrix is square it inherits some of the properties above */
1728     break;
1729   case MAT_SUBMAT_SINGLEIS:
1730     A->submat_singleis = flg;
1731     break;
1732   case MAT_STRUCTURE_ONLY:
1733     /* The option is handled directly by MatSetOption() */
1734     break;
1735   default:
1736     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1737   }
1738   PetscFunctionReturn(PETSC_SUCCESS);
1739 }
1740 
1741 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1742 {
1743   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1744   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1745   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1746   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1747   PetscInt    *cmap, *idx_p;
1748 
1749   PetscFunctionBegin;
1750   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1751   mat->getrowactive = PETSC_TRUE;
1752 
1753   if (!mat->rowvalues && (idx || v)) {
1754     /*
1755         allocate enough space to hold information from the longest row.
1756     */
1757     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1758     PetscInt    max = 1, tmp;
1759     for (i = 0; i < matin->rmap->n; i++) {
1760       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1761       if (max < tmp) max = tmp;
1762     }
1763     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1764   }
1765 
1766   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1767   lrow = row - rstart;
1768 
1769   pvA = &vworkA;
1770   pcA = &cworkA;
1771   pvB = &vworkB;
1772   pcB = &cworkB;
1773   if (!v) {
1774     pvA = NULL;
1775     pvB = NULL;
1776   }
1777   if (!idx) {
1778     pcA = NULL;
1779     if (!v) pcB = NULL;
1780   }
1781   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1782   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1783   nztot = nzA + nzB;
1784 
1785   cmap = mat->garray;
1786   if (v || idx) {
1787     if (nztot) {
1788       /* Sort by increasing column numbers, assuming A and B already sorted */
1789       PetscInt imark = -1;
1790       if (v) {
1791         *v = v_p = mat->rowvalues;
1792         for (i = 0; i < nzB; i++) {
1793           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1794           else break;
1795         }
1796         imark = i;
1797         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1798         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1799       }
1800       if (idx) {
1801         *idx = idx_p = mat->rowindices;
1802         if (imark > -1) {
1803           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1804         } else {
1805           for (i = 0; i < nzB; i++) {
1806             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1807             else break;
1808           }
1809           imark = i;
1810         }
1811         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1812         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1813       }
1814     } else {
1815       if (idx) *idx = NULL;
1816       if (v) *v = NULL;
1817     }
1818   }
1819   *nz = nztot;
1820   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1821   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1822   PetscFunctionReturn(PETSC_SUCCESS);
1823 }
1824 
1825 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1826 {
1827   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1828 
1829   PetscFunctionBegin;
1830   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1831   aij->getrowactive = PETSC_FALSE;
1832   PetscFunctionReturn(PETSC_SUCCESS);
1833 }
1834 
1835 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1836 {
1837   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1838   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1839   PetscInt         i, j, cstart = mat->cmap->rstart;
1840   PetscReal        sum = 0.0;
1841   const MatScalar *v, *amata, *bmata;
1842 
1843   PetscFunctionBegin;
1844   if (aij->size == 1) {
1845     PetscCall(MatNorm(aij->A, type, norm));
1846   } else {
1847     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1848     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1849     if (type == NORM_FROBENIUS) {
1850       v = amata;
1851       for (i = 0; i < amat->nz; i++) {
1852         sum += PetscRealPart(PetscConj(*v) * (*v));
1853         v++;
1854       }
1855       v = bmata;
1856       for (i = 0; i < bmat->nz; i++) {
1857         sum += PetscRealPart(PetscConj(*v) * (*v));
1858         v++;
1859       }
1860       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1861       *norm = PetscSqrtReal(*norm);
1862       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1863     } else if (type == NORM_1) { /* max column norm */
1864       PetscReal *tmp, *tmp2;
1865       PetscInt  *jj, *garray = aij->garray;
1866       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1867       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1868       *norm = 0.0;
1869       v     = amata;
1870       jj    = amat->j;
1871       for (j = 0; j < amat->nz; j++) {
1872         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1873         v++;
1874       }
1875       v  = bmata;
1876       jj = bmat->j;
1877       for (j = 0; j < bmat->nz; j++) {
1878         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1879         v++;
1880       }
1881       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1882       for (j = 0; j < mat->cmap->N; j++) {
1883         if (tmp2[j] > *norm) *norm = tmp2[j];
1884       }
1885       PetscCall(PetscFree(tmp));
1886       PetscCall(PetscFree(tmp2));
1887       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1888     } else if (type == NORM_INFINITY) { /* max row norm */
1889       PetscReal ntemp = 0.0;
1890       for (j = 0; j < aij->A->rmap->n; j++) {
1891         v   = amata + amat->i[j];
1892         sum = 0.0;
1893         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1894           sum += PetscAbsScalar(*v);
1895           v++;
1896         }
1897         v = bmata + bmat->i[j];
1898         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1899           sum += PetscAbsScalar(*v);
1900           v++;
1901         }
1902         if (sum > ntemp) ntemp = sum;
1903       }
1904       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1905       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1906     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1907     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1908     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1909   }
1910   PetscFunctionReturn(PETSC_SUCCESS);
1911 }
1912 
1913 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1914 {
1915   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1916   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1917   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1918   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1919   Mat              B, A_diag, *B_diag;
1920   const MatScalar *pbv, *bv;
1921 
1922   PetscFunctionBegin;
1923   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1924   ma = A->rmap->n;
1925   na = A->cmap->n;
1926   mb = a->B->rmap->n;
1927   nb = a->B->cmap->n;
1928   ai = Aloc->i;
1929   aj = Aloc->j;
1930   bi = Bloc->i;
1931   bj = Bloc->j;
1932   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1933     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1934     PetscSFNode         *oloc;
1935     PETSC_UNUSED PetscSF sf;
1936 
1937     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1938     /* compute d_nnz for preallocation */
1939     PetscCall(PetscArrayzero(d_nnz, na));
1940     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1941     /* compute local off-diagonal contributions */
1942     PetscCall(PetscArrayzero(g_nnz, nb));
1943     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1944     /* map those to global */
1945     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1946     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1947     PetscCall(PetscSFSetFromOptions(sf));
1948     PetscCall(PetscArrayzero(o_nnz, na));
1949     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1950     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1951     PetscCall(PetscSFDestroy(&sf));
1952 
1953     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1954     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1955     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1956     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1957     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1958     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1959   } else {
1960     B = *matout;
1961     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1962   }
1963 
1964   b           = (Mat_MPIAIJ *)B->data;
1965   A_diag      = a->A;
1966   B_diag      = &b->A;
1967   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1968   A_diag_ncol = A_diag->cmap->N;
1969   B_diag_ilen = sub_B_diag->ilen;
1970   B_diag_i    = sub_B_diag->i;
1971 
1972   /* Set ilen for diagonal of B */
1973   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1974 
1975   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1976   very quickly (=without using MatSetValues), because all writes are local. */
1977   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1978   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1979 
1980   /* copy over the B part */
1981   PetscCall(PetscMalloc1(bi[mb], &cols));
1982   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1983   pbv = bv;
1984   row = A->rmap->rstart;
1985   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1986   cols_tmp = cols;
1987   for (i = 0; i < mb; i++) {
1988     ncol = bi[i + 1] - bi[i];
1989     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1990     row++;
1991     pbv += ncol;
1992     cols_tmp += ncol;
1993   }
1994   PetscCall(PetscFree(cols));
1995   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1996 
1997   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1998   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1999   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2000     *matout = B;
2001   } else {
2002     PetscCall(MatHeaderMerge(A, &B));
2003   }
2004   PetscFunctionReturn(PETSC_SUCCESS);
2005 }
2006 
2007 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
2008 {
2009   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2010   Mat         a = aij->A, b = aij->B;
2011   PetscInt    s1, s2, s3;
2012 
2013   PetscFunctionBegin;
2014   PetscCall(MatGetLocalSize(mat, &s2, &s3));
2015   if (rr) {
2016     PetscCall(VecGetLocalSize(rr, &s1));
2017     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
2018     /* Overlap communication with computation. */
2019     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2020   }
2021   if (ll) {
2022     PetscCall(VecGetLocalSize(ll, &s1));
2023     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
2024     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
2025   }
2026   /* scale  the diagonal block */
2027   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2028 
2029   if (rr) {
2030     /* Do a scatter end and then right scale the off-diagonal block */
2031     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2032     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2033   }
2034   PetscFunctionReturn(PETSC_SUCCESS);
2035 }
2036 
2037 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2038 {
2039   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2040 
2041   PetscFunctionBegin;
2042   PetscCall(MatSetUnfactored(a->A));
2043   PetscFunctionReturn(PETSC_SUCCESS);
2044 }
2045 
2046 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2047 {
2048   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2049   Mat         a, b, c, d;
2050   PetscBool   flg;
2051 
2052   PetscFunctionBegin;
2053   a = matA->A;
2054   b = matA->B;
2055   c = matB->A;
2056   d = matB->B;
2057 
2058   PetscCall(MatEqual(a, c, &flg));
2059   if (flg) PetscCall(MatEqual(b, d, &flg));
2060   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2061   PetscFunctionReturn(PETSC_SUCCESS);
2062 }
2063 
2064 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2065 {
2066   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2067   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2068 
2069   PetscFunctionBegin;
2070   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2071   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2072     /* because of the column compression in the off-processor part of the matrix a->B,
2073        the number of columns in a->B and b->B may be different, hence we cannot call
2074        the MatCopy() directly on the two parts. If need be, we can provide a more
2075        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2076        then copying the submatrices */
2077     PetscCall(MatCopy_Basic(A, B, str));
2078   } else {
2079     PetscCall(MatCopy(a->A, b->A, str));
2080     PetscCall(MatCopy(a->B, b->B, str));
2081   }
2082   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2083   PetscFunctionReturn(PETSC_SUCCESS);
2084 }
2085 
2086 /*
2087    Computes the number of nonzeros per row needed for preallocation when X and Y
2088    have different nonzero structure.
2089 */
2090 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2091 {
2092   PetscInt i, j, k, nzx, nzy;
2093 
2094   PetscFunctionBegin;
2095   /* Set the number of nonzeros in the new matrix */
2096   for (i = 0; i < m; i++) {
2097     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2098     nzx    = xi[i + 1] - xi[i];
2099     nzy    = yi[i + 1] - yi[i];
2100     nnz[i] = 0;
2101     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2102       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2103       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2104       nnz[i]++;
2105     }
2106     for (; k < nzy; k++) nnz[i]++;
2107   }
2108   PetscFunctionReturn(PETSC_SUCCESS);
2109 }
2110 
2111 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2112 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2113 {
2114   PetscInt    m = Y->rmap->N;
2115   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2116   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2117 
2118   PetscFunctionBegin;
2119   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2120   PetscFunctionReturn(PETSC_SUCCESS);
2121 }
2122 
2123 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2124 {
2125   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2126 
2127   PetscFunctionBegin;
2128   if (str == SAME_NONZERO_PATTERN) {
2129     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2130     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2131   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2132     PetscCall(MatAXPY_Basic(Y, a, X, str));
2133   } else {
2134     Mat       B;
2135     PetscInt *nnz_d, *nnz_o;
2136 
2137     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2138     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2139     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2140     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2141     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2142     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2143     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2144     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2145     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2146     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2147     PetscCall(MatHeaderMerge(Y, &B));
2148     PetscCall(PetscFree(nnz_d));
2149     PetscCall(PetscFree(nnz_o));
2150   }
2151   PetscFunctionReturn(PETSC_SUCCESS);
2152 }
2153 
2154 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2155 
2156 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2157 {
2158   PetscFunctionBegin;
2159   if (PetscDefined(USE_COMPLEX)) {
2160     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2161 
2162     PetscCall(MatConjugate_SeqAIJ(aij->A));
2163     PetscCall(MatConjugate_SeqAIJ(aij->B));
2164   }
2165   PetscFunctionReturn(PETSC_SUCCESS);
2166 }
2167 
2168 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2169 {
2170   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2171 
2172   PetscFunctionBegin;
2173   PetscCall(MatRealPart(a->A));
2174   PetscCall(MatRealPart(a->B));
2175   PetscFunctionReturn(PETSC_SUCCESS);
2176 }
2177 
2178 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2179 {
2180   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2181 
2182   PetscFunctionBegin;
2183   PetscCall(MatImaginaryPart(a->A));
2184   PetscCall(MatImaginaryPart(a->B));
2185   PetscFunctionReturn(PETSC_SUCCESS);
2186 }
2187 
2188 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2189 {
2190   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2191   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2192   PetscScalar       *va, *vv;
2193   Vec                vB, vA;
2194   const PetscScalar *vb;
2195 
2196   PetscFunctionBegin;
2197   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2198   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2199 
2200   PetscCall(VecGetArrayWrite(vA, &va));
2201   if (idx) {
2202     for (i = 0; i < m; i++) {
2203       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2204     }
2205   }
2206 
2207   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2208   PetscCall(PetscMalloc1(m, &idxb));
2209   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2210 
2211   PetscCall(VecGetArrayWrite(v, &vv));
2212   PetscCall(VecGetArrayRead(vB, &vb));
2213   for (i = 0; i < m; i++) {
2214     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2215       vv[i] = vb[i];
2216       if (idx) idx[i] = a->garray[idxb[i]];
2217     } else {
2218       vv[i] = va[i];
2219       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2220     }
2221   }
2222   PetscCall(VecRestoreArrayWrite(vA, &vv));
2223   PetscCall(VecRestoreArrayWrite(vA, &va));
2224   PetscCall(VecRestoreArrayRead(vB, &vb));
2225   PetscCall(PetscFree(idxb));
2226   PetscCall(VecDestroy(&vA));
2227   PetscCall(VecDestroy(&vB));
2228   PetscFunctionReturn(PETSC_SUCCESS);
2229 }
2230 
2231 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2232 {
2233   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2234   PetscInt           m = A->rmap->n, n = A->cmap->n;
2235   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2236   PetscInt          *cmap = mat->garray;
2237   PetscInt          *diagIdx, *offdiagIdx;
2238   Vec                diagV, offdiagV;
2239   PetscScalar       *a, *diagA, *offdiagA;
2240   const PetscScalar *ba, *bav;
2241   PetscInt           r, j, col, ncols, *bi, *bj;
2242   Mat                B = mat->B;
2243   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2244 
2245   PetscFunctionBegin;
2246   /* When a process holds entire A and other processes have no entry */
2247   if (A->cmap->N == n) {
2248     PetscCall(VecGetArrayWrite(v, &diagA));
2249     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2250     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2251     PetscCall(VecDestroy(&diagV));
2252     PetscCall(VecRestoreArrayWrite(v, &diagA));
2253     PetscFunctionReturn(PETSC_SUCCESS);
2254   } else if (n == 0) {
2255     if (m) {
2256       PetscCall(VecGetArrayWrite(v, &a));
2257       for (r = 0; r < m; r++) {
2258         a[r] = 0.0;
2259         if (idx) idx[r] = -1;
2260       }
2261       PetscCall(VecRestoreArrayWrite(v, &a));
2262     }
2263     PetscFunctionReturn(PETSC_SUCCESS);
2264   }
2265 
2266   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2267   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2268   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2269   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2270 
2271   /* Get offdiagIdx[] for implicit 0.0 */
2272   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2273   ba = bav;
2274   bi = b->i;
2275   bj = b->j;
2276   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2277   for (r = 0; r < m; r++) {
2278     ncols = bi[r + 1] - bi[r];
2279     if (ncols == A->cmap->N - n) { /* Brow is dense */
2280       offdiagA[r]   = *ba;
2281       offdiagIdx[r] = cmap[0];
2282     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2283       offdiagA[r] = 0.0;
2284 
2285       /* Find first hole in the cmap */
2286       for (j = 0; j < ncols; j++) {
2287         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2288         if (col > j && j < cstart) {
2289           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2290           break;
2291         } else if (col > j + n && j >= cstart) {
2292           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2293           break;
2294         }
2295       }
2296       if (j == ncols && ncols < A->cmap->N - n) {
2297         /* a hole is outside compressed Bcols */
2298         if (ncols == 0) {
2299           if (cstart) {
2300             offdiagIdx[r] = 0;
2301           } else offdiagIdx[r] = cend;
2302         } else { /* ncols > 0 */
2303           offdiagIdx[r] = cmap[ncols - 1] + 1;
2304           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2305         }
2306       }
2307     }
2308 
2309     for (j = 0; j < ncols; j++) {
2310       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2311         offdiagA[r]   = *ba;
2312         offdiagIdx[r] = cmap[*bj];
2313       }
2314       ba++;
2315       bj++;
2316     }
2317   }
2318 
2319   PetscCall(VecGetArrayWrite(v, &a));
2320   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2321   for (r = 0; r < m; ++r) {
2322     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2323       a[r] = diagA[r];
2324       if (idx) idx[r] = cstart + diagIdx[r];
2325     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2326       a[r] = diagA[r];
2327       if (idx) {
2328         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2329           idx[r] = cstart + diagIdx[r];
2330         } else idx[r] = offdiagIdx[r];
2331       }
2332     } else {
2333       a[r] = offdiagA[r];
2334       if (idx) idx[r] = offdiagIdx[r];
2335     }
2336   }
2337   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2338   PetscCall(VecRestoreArrayWrite(v, &a));
2339   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2340   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2341   PetscCall(VecDestroy(&diagV));
2342   PetscCall(VecDestroy(&offdiagV));
2343   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2344   PetscFunctionReturn(PETSC_SUCCESS);
2345 }
2346 
2347 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2348 {
2349   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2350   PetscInt           m = A->rmap->n, n = A->cmap->n;
2351   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2352   PetscInt          *cmap = mat->garray;
2353   PetscInt          *diagIdx, *offdiagIdx;
2354   Vec                diagV, offdiagV;
2355   PetscScalar       *a, *diagA, *offdiagA;
2356   const PetscScalar *ba, *bav;
2357   PetscInt           r, j, col, ncols, *bi, *bj;
2358   Mat                B = mat->B;
2359   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2360 
2361   PetscFunctionBegin;
2362   /* When a process holds entire A and other processes have no entry */
2363   if (A->cmap->N == n) {
2364     PetscCall(VecGetArrayWrite(v, &diagA));
2365     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2366     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2367     PetscCall(VecDestroy(&diagV));
2368     PetscCall(VecRestoreArrayWrite(v, &diagA));
2369     PetscFunctionReturn(PETSC_SUCCESS);
2370   } else if (n == 0) {
2371     if (m) {
2372       PetscCall(VecGetArrayWrite(v, &a));
2373       for (r = 0; r < m; r++) {
2374         a[r] = PETSC_MAX_REAL;
2375         if (idx) idx[r] = -1;
2376       }
2377       PetscCall(VecRestoreArrayWrite(v, &a));
2378     }
2379     PetscFunctionReturn(PETSC_SUCCESS);
2380   }
2381 
2382   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2383   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2384   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2385   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2386 
2387   /* Get offdiagIdx[] for implicit 0.0 */
2388   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2389   ba = bav;
2390   bi = b->i;
2391   bj = b->j;
2392   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2393   for (r = 0; r < m; r++) {
2394     ncols = bi[r + 1] - bi[r];
2395     if (ncols == A->cmap->N - n) { /* Brow is dense */
2396       offdiagA[r]   = *ba;
2397       offdiagIdx[r] = cmap[0];
2398     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2399       offdiagA[r] = 0.0;
2400 
2401       /* Find first hole in the cmap */
2402       for (j = 0; j < ncols; j++) {
2403         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2404         if (col > j && j < cstart) {
2405           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2406           break;
2407         } else if (col > j + n && j >= cstart) {
2408           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2409           break;
2410         }
2411       }
2412       if (j == ncols && ncols < A->cmap->N - n) {
2413         /* a hole is outside compressed Bcols */
2414         if (ncols == 0) {
2415           if (cstart) {
2416             offdiagIdx[r] = 0;
2417           } else offdiagIdx[r] = cend;
2418         } else { /* ncols > 0 */
2419           offdiagIdx[r] = cmap[ncols - 1] + 1;
2420           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2421         }
2422       }
2423     }
2424 
2425     for (j = 0; j < ncols; j++) {
2426       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2427         offdiagA[r]   = *ba;
2428         offdiagIdx[r] = cmap[*bj];
2429       }
2430       ba++;
2431       bj++;
2432     }
2433   }
2434 
2435   PetscCall(VecGetArrayWrite(v, &a));
2436   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2437   for (r = 0; r < m; ++r) {
2438     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2439       a[r] = diagA[r];
2440       if (idx) idx[r] = cstart + diagIdx[r];
2441     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2442       a[r] = diagA[r];
2443       if (idx) {
2444         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2445           idx[r] = cstart + diagIdx[r];
2446         } else idx[r] = offdiagIdx[r];
2447       }
2448     } else {
2449       a[r] = offdiagA[r];
2450       if (idx) idx[r] = offdiagIdx[r];
2451     }
2452   }
2453   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2454   PetscCall(VecRestoreArrayWrite(v, &a));
2455   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2456   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2457   PetscCall(VecDestroy(&diagV));
2458   PetscCall(VecDestroy(&offdiagV));
2459   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2460   PetscFunctionReturn(PETSC_SUCCESS);
2461 }
2462 
2463 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2464 {
2465   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2466   PetscInt           m = A->rmap->n, n = A->cmap->n;
2467   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2468   PetscInt          *cmap = mat->garray;
2469   PetscInt          *diagIdx, *offdiagIdx;
2470   Vec                diagV, offdiagV;
2471   PetscScalar       *a, *diagA, *offdiagA;
2472   const PetscScalar *ba, *bav;
2473   PetscInt           r, j, col, ncols, *bi, *bj;
2474   Mat                B = mat->B;
2475   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2476 
2477   PetscFunctionBegin;
2478   /* When a process holds entire A and other processes have no entry */
2479   if (A->cmap->N == n) {
2480     PetscCall(VecGetArrayWrite(v, &diagA));
2481     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2482     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2483     PetscCall(VecDestroy(&diagV));
2484     PetscCall(VecRestoreArrayWrite(v, &diagA));
2485     PetscFunctionReturn(PETSC_SUCCESS);
2486   } else if (n == 0) {
2487     if (m) {
2488       PetscCall(VecGetArrayWrite(v, &a));
2489       for (r = 0; r < m; r++) {
2490         a[r] = PETSC_MIN_REAL;
2491         if (idx) idx[r] = -1;
2492       }
2493       PetscCall(VecRestoreArrayWrite(v, &a));
2494     }
2495     PetscFunctionReturn(PETSC_SUCCESS);
2496   }
2497 
2498   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2499   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2500   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2501   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2502 
2503   /* Get offdiagIdx[] for implicit 0.0 */
2504   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2505   ba = bav;
2506   bi = b->i;
2507   bj = b->j;
2508   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2509   for (r = 0; r < m; r++) {
2510     ncols = bi[r + 1] - bi[r];
2511     if (ncols == A->cmap->N - n) { /* Brow is dense */
2512       offdiagA[r]   = *ba;
2513       offdiagIdx[r] = cmap[0];
2514     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2515       offdiagA[r] = 0.0;
2516 
2517       /* Find first hole in the cmap */
2518       for (j = 0; j < ncols; j++) {
2519         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2520         if (col > j && j < cstart) {
2521           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2522           break;
2523         } else if (col > j + n && j >= cstart) {
2524           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2525           break;
2526         }
2527       }
2528       if (j == ncols && ncols < A->cmap->N - n) {
2529         /* a hole is outside compressed Bcols */
2530         if (ncols == 0) {
2531           if (cstart) {
2532             offdiagIdx[r] = 0;
2533           } else offdiagIdx[r] = cend;
2534         } else { /* ncols > 0 */
2535           offdiagIdx[r] = cmap[ncols - 1] + 1;
2536           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2537         }
2538       }
2539     }
2540 
2541     for (j = 0; j < ncols; j++) {
2542       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2543         offdiagA[r]   = *ba;
2544         offdiagIdx[r] = cmap[*bj];
2545       }
2546       ba++;
2547       bj++;
2548     }
2549   }
2550 
2551   PetscCall(VecGetArrayWrite(v, &a));
2552   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2553   for (r = 0; r < m; ++r) {
2554     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2555       a[r] = diagA[r];
2556       if (idx) idx[r] = cstart + diagIdx[r];
2557     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2558       a[r] = diagA[r];
2559       if (idx) {
2560         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2561           idx[r] = cstart + diagIdx[r];
2562         } else idx[r] = offdiagIdx[r];
2563       }
2564     } else {
2565       a[r] = offdiagA[r];
2566       if (idx) idx[r] = offdiagIdx[r];
2567     }
2568   }
2569   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2570   PetscCall(VecRestoreArrayWrite(v, &a));
2571   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2572   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2573   PetscCall(VecDestroy(&diagV));
2574   PetscCall(VecDestroy(&offdiagV));
2575   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2576   PetscFunctionReturn(PETSC_SUCCESS);
2577 }
2578 
2579 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2580 {
2581   Mat *dummy;
2582 
2583   PetscFunctionBegin;
2584   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2585   *newmat = *dummy;
2586   PetscCall(PetscFree(dummy));
2587   PetscFunctionReturn(PETSC_SUCCESS);
2588 }
2589 
2590 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2591 {
2592   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2593 
2594   PetscFunctionBegin;
2595   PetscCall(MatInvertBlockDiagonal(a->A, values));
2596   A->factorerrortype = a->A->factorerrortype;
2597   PetscFunctionReturn(PETSC_SUCCESS);
2598 }
2599 
2600 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2601 {
2602   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2603 
2604   PetscFunctionBegin;
2605   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2606   PetscCall(MatSetRandom(aij->A, rctx));
2607   if (x->assembled) {
2608     PetscCall(MatSetRandom(aij->B, rctx));
2609   } else {
2610     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2611   }
2612   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2613   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2614   PetscFunctionReturn(PETSC_SUCCESS);
2615 }
2616 
2617 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2618 {
2619   PetscFunctionBegin;
2620   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2621   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2622   PetscFunctionReturn(PETSC_SUCCESS);
2623 }
2624 
2625 /*@
2626    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2627 
2628    Not Collective
2629 
2630    Input Parameter:
2631 .    A - the matrix
2632 
2633    Output Parameter:
2634 .    nz - the number of nonzeros
2635 
2636  Level: advanced
2637 
2638 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `Mat`
2639 @*/
2640 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2641 {
2642   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2643   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2644   PetscBool   isaij;
2645 
2646   PetscFunctionBegin;
2647   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2648   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2649   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2650   PetscFunctionReturn(PETSC_SUCCESS);
2651 }
2652 
2653 /*@
2654    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2655 
2656    Collective
2657 
2658    Input Parameters:
2659 +    A - the matrix
2660 -    sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2661 
2662  Level: advanced
2663 
2664 .seealso: [](chapter_matrices), `Mat`, `Mat`, `MATMPIAIJ`
2665 @*/
2666 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2667 {
2668   PetscFunctionBegin;
2669   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2670   PetscFunctionReturn(PETSC_SUCCESS);
2671 }
2672 
2673 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2674 {
2675   PetscBool sc = PETSC_FALSE, flg;
2676 
2677   PetscFunctionBegin;
2678   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2679   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2680   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2681   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2682   PetscOptionsHeadEnd();
2683   PetscFunctionReturn(PETSC_SUCCESS);
2684 }
2685 
2686 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2687 {
2688   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2689   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2690 
2691   PetscFunctionBegin;
2692   if (!Y->preallocated) {
2693     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2694   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2695     PetscInt nonew = aij->nonew;
2696     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2697     aij->nonew = nonew;
2698   }
2699   PetscCall(MatShift_Basic(Y, a));
2700   PetscFunctionReturn(PETSC_SUCCESS);
2701 }
2702 
2703 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2704 {
2705   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2706 
2707   PetscFunctionBegin;
2708   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2709   PetscCall(MatMissingDiagonal(a->A, missing, d));
2710   if (d) {
2711     PetscInt rstart;
2712     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2713     *d += rstart;
2714   }
2715   PetscFunctionReturn(PETSC_SUCCESS);
2716 }
2717 
2718 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2719 {
2720   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2721 
2722   PetscFunctionBegin;
2723   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2724   PetscFunctionReturn(PETSC_SUCCESS);
2725 }
2726 
2727 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A)
2728 {
2729   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2730 
2731   PetscFunctionBegin;
2732   PetscCall(MatEliminateZeros(a->A));
2733   PetscCall(MatEliminateZeros(a->B));
2734   PetscFunctionReturn(PETSC_SUCCESS);
2735 }
2736 
2737 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2738                                        MatGetRow_MPIAIJ,
2739                                        MatRestoreRow_MPIAIJ,
2740                                        MatMult_MPIAIJ,
2741                                        /* 4*/ MatMultAdd_MPIAIJ,
2742                                        MatMultTranspose_MPIAIJ,
2743                                        MatMultTransposeAdd_MPIAIJ,
2744                                        NULL,
2745                                        NULL,
2746                                        NULL,
2747                                        /*10*/ NULL,
2748                                        NULL,
2749                                        NULL,
2750                                        MatSOR_MPIAIJ,
2751                                        MatTranspose_MPIAIJ,
2752                                        /*15*/ MatGetInfo_MPIAIJ,
2753                                        MatEqual_MPIAIJ,
2754                                        MatGetDiagonal_MPIAIJ,
2755                                        MatDiagonalScale_MPIAIJ,
2756                                        MatNorm_MPIAIJ,
2757                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2758                                        MatAssemblyEnd_MPIAIJ,
2759                                        MatSetOption_MPIAIJ,
2760                                        MatZeroEntries_MPIAIJ,
2761                                        /*24*/ MatZeroRows_MPIAIJ,
2762                                        NULL,
2763                                        NULL,
2764                                        NULL,
2765                                        NULL,
2766                                        /*29*/ MatSetUp_MPI_Hash,
2767                                        NULL,
2768                                        NULL,
2769                                        MatGetDiagonalBlock_MPIAIJ,
2770                                        NULL,
2771                                        /*34*/ MatDuplicate_MPIAIJ,
2772                                        NULL,
2773                                        NULL,
2774                                        NULL,
2775                                        NULL,
2776                                        /*39*/ MatAXPY_MPIAIJ,
2777                                        MatCreateSubMatrices_MPIAIJ,
2778                                        MatIncreaseOverlap_MPIAIJ,
2779                                        MatGetValues_MPIAIJ,
2780                                        MatCopy_MPIAIJ,
2781                                        /*44*/ MatGetRowMax_MPIAIJ,
2782                                        MatScale_MPIAIJ,
2783                                        MatShift_MPIAIJ,
2784                                        MatDiagonalSet_MPIAIJ,
2785                                        MatZeroRowsColumns_MPIAIJ,
2786                                        /*49*/ MatSetRandom_MPIAIJ,
2787                                        MatGetRowIJ_MPIAIJ,
2788                                        MatRestoreRowIJ_MPIAIJ,
2789                                        NULL,
2790                                        NULL,
2791                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2792                                        NULL,
2793                                        MatSetUnfactored_MPIAIJ,
2794                                        MatPermute_MPIAIJ,
2795                                        NULL,
2796                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2797                                        MatDestroy_MPIAIJ,
2798                                        MatView_MPIAIJ,
2799                                        NULL,
2800                                        NULL,
2801                                        /*64*/ NULL,
2802                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2803                                        NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2807                                        MatGetRowMinAbs_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        NULL,
2811                                        NULL,
2812                                        /*75*/ MatFDColoringApply_AIJ,
2813                                        MatSetFromOptions_MPIAIJ,
2814                                        NULL,
2815                                        NULL,
2816                                        MatFindZeroDiagonals_MPIAIJ,
2817                                        /*80*/ NULL,
2818                                        NULL,
2819                                        NULL,
2820                                        /*83*/ MatLoad_MPIAIJ,
2821                                        MatIsSymmetric_MPIAIJ,
2822                                        NULL,
2823                                        NULL,
2824                                        NULL,
2825                                        NULL,
2826                                        /*89*/ NULL,
2827                                        NULL,
2828                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2829                                        NULL,
2830                                        NULL,
2831                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2832                                        NULL,
2833                                        NULL,
2834                                        NULL,
2835                                        MatBindToCPU_MPIAIJ,
2836                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2837                                        NULL,
2838                                        NULL,
2839                                        MatConjugate_MPIAIJ,
2840                                        NULL,
2841                                        /*104*/ MatSetValuesRow_MPIAIJ,
2842                                        MatRealPart_MPIAIJ,
2843                                        MatImaginaryPart_MPIAIJ,
2844                                        NULL,
2845                                        NULL,
2846                                        /*109*/ NULL,
2847                                        NULL,
2848                                        MatGetRowMin_MPIAIJ,
2849                                        NULL,
2850                                        MatMissingDiagonal_MPIAIJ,
2851                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2852                                        NULL,
2853                                        MatGetGhosts_MPIAIJ,
2854                                        NULL,
2855                                        NULL,
2856                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2857                                        NULL,
2858                                        NULL,
2859                                        NULL,
2860                                        MatGetMultiProcBlock_MPIAIJ,
2861                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2862                                        MatGetColumnReductions_MPIAIJ,
2863                                        MatInvertBlockDiagonal_MPIAIJ,
2864                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2865                                        MatCreateSubMatricesMPI_MPIAIJ,
2866                                        /*129*/ NULL,
2867                                        NULL,
2868                                        NULL,
2869                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2870                                        NULL,
2871                                        /*134*/ NULL,
2872                                        NULL,
2873                                        NULL,
2874                                        NULL,
2875                                        NULL,
2876                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2877                                        NULL,
2878                                        NULL,
2879                                        MatFDColoringSetUp_MPIXAIJ,
2880                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2881                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2882                                        /*145*/ NULL,
2883                                        NULL,
2884                                        NULL,
2885                                        MatCreateGraph_Simple_AIJ,
2886                                        NULL,
2887                                        /*150*/ NULL,
2888                                        MatEliminateZeros_MPIAIJ};
2889 
2890 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2891 {
2892   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2893 
2894   PetscFunctionBegin;
2895   PetscCall(MatStoreValues(aij->A));
2896   PetscCall(MatStoreValues(aij->B));
2897   PetscFunctionReturn(PETSC_SUCCESS);
2898 }
2899 
2900 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2901 {
2902   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2903 
2904   PetscFunctionBegin;
2905   PetscCall(MatRetrieveValues(aij->A));
2906   PetscCall(MatRetrieveValues(aij->B));
2907   PetscFunctionReturn(PETSC_SUCCESS);
2908 }
2909 
2910 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2911 {
2912   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2913   PetscMPIInt size;
2914 
2915   PetscFunctionBegin;
2916   if (B->hash_active) {
2917     PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops))));
2918     B->hash_active = PETSC_FALSE;
2919   }
2920   PetscCall(PetscLayoutSetUp(B->rmap));
2921   PetscCall(PetscLayoutSetUp(B->cmap));
2922 
2923 #if defined(PETSC_USE_CTABLE)
2924   PetscCall(PetscHMapIDestroy(&b->colmap));
2925 #else
2926   PetscCall(PetscFree(b->colmap));
2927 #endif
2928   PetscCall(PetscFree(b->garray));
2929   PetscCall(VecDestroy(&b->lvec));
2930   PetscCall(VecScatterDestroy(&b->Mvctx));
2931 
2932   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2933   PetscCall(MatDestroy(&b->B));
2934   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2935   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2936   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2937   PetscCall(MatSetType(b->B, MATSEQAIJ));
2938 
2939   PetscCall(MatDestroy(&b->A));
2940   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2941   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2942   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2943   PetscCall(MatSetType(b->A, MATSEQAIJ));
2944 
2945   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2946   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2947   B->preallocated  = PETSC_TRUE;
2948   B->was_assembled = PETSC_FALSE;
2949   B->assembled     = PETSC_FALSE;
2950   PetscFunctionReturn(PETSC_SUCCESS);
2951 }
2952 
2953 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2954 {
2955   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2956 
2957   PetscFunctionBegin;
2958   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2959   PetscCall(PetscLayoutSetUp(B->rmap));
2960   PetscCall(PetscLayoutSetUp(B->cmap));
2961 
2962 #if defined(PETSC_USE_CTABLE)
2963   PetscCall(PetscHMapIDestroy(&b->colmap));
2964 #else
2965   PetscCall(PetscFree(b->colmap));
2966 #endif
2967   PetscCall(PetscFree(b->garray));
2968   PetscCall(VecDestroy(&b->lvec));
2969   PetscCall(VecScatterDestroy(&b->Mvctx));
2970 
2971   PetscCall(MatResetPreallocation(b->A));
2972   PetscCall(MatResetPreallocation(b->B));
2973   B->preallocated  = PETSC_TRUE;
2974   B->was_assembled = PETSC_FALSE;
2975   B->assembled     = PETSC_FALSE;
2976   PetscFunctionReturn(PETSC_SUCCESS);
2977 }
2978 
2979 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2980 {
2981   Mat         mat;
2982   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2983 
2984   PetscFunctionBegin;
2985   *newmat = NULL;
2986   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2987   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2988   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2989   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2990   a = (Mat_MPIAIJ *)mat->data;
2991 
2992   mat->factortype   = matin->factortype;
2993   mat->assembled    = matin->assembled;
2994   mat->insertmode   = NOT_SET_VALUES;
2995   mat->preallocated = matin->preallocated;
2996 
2997   a->size         = oldmat->size;
2998   a->rank         = oldmat->rank;
2999   a->donotstash   = oldmat->donotstash;
3000   a->roworiented  = oldmat->roworiented;
3001   a->rowindices   = NULL;
3002   a->rowvalues    = NULL;
3003   a->getrowactive = PETSC_FALSE;
3004 
3005   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3006   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3007 
3008   if (oldmat->colmap) {
3009 #if defined(PETSC_USE_CTABLE)
3010     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3011 #else
3012     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3013     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3014 #endif
3015   } else a->colmap = NULL;
3016   if (oldmat->garray) {
3017     PetscInt len;
3018     len = oldmat->B->cmap->n;
3019     PetscCall(PetscMalloc1(len + 1, &a->garray));
3020     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3021   } else a->garray = NULL;
3022 
3023   /* It may happen MatDuplicate is called with a non-assembled matrix
3024      In fact, MatDuplicate only requires the matrix to be preallocated
3025      This may happen inside a DMCreateMatrix_Shell */
3026   if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3027   if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3028   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3029   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3030   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3031   *newmat = mat;
3032   PetscFunctionReturn(PETSC_SUCCESS);
3033 }
3034 
3035 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3036 {
3037   PetscBool isbinary, ishdf5;
3038 
3039   PetscFunctionBegin;
3040   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3041   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3042   /* force binary viewer to load .info file if it has not yet done so */
3043   PetscCall(PetscViewerSetUp(viewer));
3044   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3045   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3046   if (isbinary) {
3047     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3048   } else if (ishdf5) {
3049 #if defined(PETSC_HAVE_HDF5)
3050     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3051 #else
3052     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3053 #endif
3054   } else {
3055     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3056   }
3057   PetscFunctionReturn(PETSC_SUCCESS);
3058 }
3059 
3060 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3061 {
3062   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3063   PetscInt    *rowidxs, *colidxs;
3064   PetscScalar *matvals;
3065 
3066   PetscFunctionBegin;
3067   PetscCall(PetscViewerSetUp(viewer));
3068 
3069   /* read in matrix header */
3070   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3071   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3072   M  = header[1];
3073   N  = header[2];
3074   nz = header[3];
3075   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3076   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3077   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3078 
3079   /* set block sizes from the viewer's .info file */
3080   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3081   /* set global sizes if not set already */
3082   if (mat->rmap->N < 0) mat->rmap->N = M;
3083   if (mat->cmap->N < 0) mat->cmap->N = N;
3084   PetscCall(PetscLayoutSetUp(mat->rmap));
3085   PetscCall(PetscLayoutSetUp(mat->cmap));
3086 
3087   /* check if the matrix sizes are correct */
3088   PetscCall(MatGetSize(mat, &rows, &cols));
3089   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3090 
3091   /* read in row lengths and build row indices */
3092   PetscCall(MatGetLocalSize(mat, &m, NULL));
3093   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3094   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3095   rowidxs[0] = 0;
3096   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3097   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3098   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3099   /* read in column indices and matrix values */
3100   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3101   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3102   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3103   /* store matrix indices and values */
3104   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3105   PetscCall(PetscFree(rowidxs));
3106   PetscCall(PetscFree2(colidxs, matvals));
3107   PetscFunctionReturn(PETSC_SUCCESS);
3108 }
3109 
3110 /* Not scalable because of ISAllGather() unless getting all columns. */
3111 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3112 {
3113   IS          iscol_local;
3114   PetscBool   isstride;
3115   PetscMPIInt lisstride = 0, gisstride;
3116 
3117   PetscFunctionBegin;
3118   /* check if we are grabbing all columns*/
3119   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3120 
3121   if (isstride) {
3122     PetscInt start, len, mstart, mlen;
3123     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3124     PetscCall(ISGetLocalSize(iscol, &len));
3125     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3126     if (mstart == start && mlen - mstart == len) lisstride = 1;
3127   }
3128 
3129   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3130   if (gisstride) {
3131     PetscInt N;
3132     PetscCall(MatGetSize(mat, NULL, &N));
3133     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3134     PetscCall(ISSetIdentity(iscol_local));
3135     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3136   } else {
3137     PetscInt cbs;
3138     PetscCall(ISGetBlockSize(iscol, &cbs));
3139     PetscCall(ISAllGather(iscol, &iscol_local));
3140     PetscCall(ISSetBlockSize(iscol_local, cbs));
3141   }
3142 
3143   *isseq = iscol_local;
3144   PetscFunctionReturn(PETSC_SUCCESS);
3145 }
3146 
3147 /*
3148  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3149  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3150 
3151  Input Parameters:
3152 +   mat - matrix
3153 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3154            i.e., mat->rstart <= isrow[i] < mat->rend
3155 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3156            i.e., mat->cstart <= iscol[i] < mat->cend
3157 
3158  Output Parameters:
3159 +   isrow_d - sequential row index set for retrieving mat->A
3160 .   iscol_d - sequential  column index set for retrieving mat->A
3161 .   iscol_o - sequential column index set for retrieving mat->B
3162 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3163  */
3164 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3165 {
3166   Vec             x, cmap;
3167   const PetscInt *is_idx;
3168   PetscScalar    *xarray, *cmaparray;
3169   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3170   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3171   Mat             B    = a->B;
3172   Vec             lvec = a->lvec, lcmap;
3173   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3174   MPI_Comm        comm;
3175   VecScatter      Mvctx = a->Mvctx;
3176 
3177   PetscFunctionBegin;
3178   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3179   PetscCall(ISGetLocalSize(iscol, &ncols));
3180 
3181   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3182   PetscCall(MatCreateVecs(mat, &x, NULL));
3183   PetscCall(VecSet(x, -1.0));
3184   PetscCall(VecDuplicate(x, &cmap));
3185   PetscCall(VecSet(cmap, -1.0));
3186 
3187   /* Get start indices */
3188   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3189   isstart -= ncols;
3190   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3191 
3192   PetscCall(ISGetIndices(iscol, &is_idx));
3193   PetscCall(VecGetArray(x, &xarray));
3194   PetscCall(VecGetArray(cmap, &cmaparray));
3195   PetscCall(PetscMalloc1(ncols, &idx));
3196   for (i = 0; i < ncols; i++) {
3197     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3198     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3199     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3200   }
3201   PetscCall(VecRestoreArray(x, &xarray));
3202   PetscCall(VecRestoreArray(cmap, &cmaparray));
3203   PetscCall(ISRestoreIndices(iscol, &is_idx));
3204 
3205   /* Get iscol_d */
3206   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3207   PetscCall(ISGetBlockSize(iscol, &i));
3208   PetscCall(ISSetBlockSize(*iscol_d, i));
3209 
3210   /* Get isrow_d */
3211   PetscCall(ISGetLocalSize(isrow, &m));
3212   rstart = mat->rmap->rstart;
3213   PetscCall(PetscMalloc1(m, &idx));
3214   PetscCall(ISGetIndices(isrow, &is_idx));
3215   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3216   PetscCall(ISRestoreIndices(isrow, &is_idx));
3217 
3218   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3219   PetscCall(ISGetBlockSize(isrow, &i));
3220   PetscCall(ISSetBlockSize(*isrow_d, i));
3221 
3222   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3223   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3224   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3225 
3226   PetscCall(VecDuplicate(lvec, &lcmap));
3227 
3228   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3229   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3230 
3231   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3232   /* off-process column indices */
3233   count = 0;
3234   PetscCall(PetscMalloc1(Bn, &idx));
3235   PetscCall(PetscMalloc1(Bn, &cmap1));
3236 
3237   PetscCall(VecGetArray(lvec, &xarray));
3238   PetscCall(VecGetArray(lcmap, &cmaparray));
3239   for (i = 0; i < Bn; i++) {
3240     if (PetscRealPart(xarray[i]) > -1.0) {
3241       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3242       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3243       count++;
3244     }
3245   }
3246   PetscCall(VecRestoreArray(lvec, &xarray));
3247   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3248 
3249   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3250   /* cannot ensure iscol_o has same blocksize as iscol! */
3251 
3252   PetscCall(PetscFree(idx));
3253   *garray = cmap1;
3254 
3255   PetscCall(VecDestroy(&x));
3256   PetscCall(VecDestroy(&cmap));
3257   PetscCall(VecDestroy(&lcmap));
3258   PetscFunctionReturn(PETSC_SUCCESS);
3259 }
3260 
3261 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3262 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3263 {
3264   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3265   Mat         M = NULL;
3266   MPI_Comm    comm;
3267   IS          iscol_d, isrow_d, iscol_o;
3268   Mat         Asub = NULL, Bsub = NULL;
3269   PetscInt    n;
3270 
3271   PetscFunctionBegin;
3272   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3273 
3274   if (call == MAT_REUSE_MATRIX) {
3275     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3276     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3277     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3278 
3279     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3280     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3281 
3282     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3283     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3284 
3285     /* Update diagonal and off-diagonal portions of submat */
3286     asub = (Mat_MPIAIJ *)(*submat)->data;
3287     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3288     PetscCall(ISGetLocalSize(iscol_o, &n));
3289     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3290     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3291     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3292 
3293   } else { /* call == MAT_INITIAL_MATRIX) */
3294     const PetscInt *garray;
3295     PetscInt        BsubN;
3296 
3297     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3298     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3299 
3300     /* Create local submatrices Asub and Bsub */
3301     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3302     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3303 
3304     /* Create submatrix M */
3305     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3306 
3307     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3308     asub = (Mat_MPIAIJ *)M->data;
3309 
3310     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3311     n = asub->B->cmap->N;
3312     if (BsubN > n) {
3313       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3314       const PetscInt *idx;
3315       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3316       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3317 
3318       PetscCall(PetscMalloc1(n, &idx_new));
3319       j = 0;
3320       PetscCall(ISGetIndices(iscol_o, &idx));
3321       for (i = 0; i < n; i++) {
3322         if (j >= BsubN) break;
3323         while (subgarray[i] > garray[j]) j++;
3324 
3325         if (subgarray[i] == garray[j]) {
3326           idx_new[i] = idx[j++];
3327         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3328       }
3329       PetscCall(ISRestoreIndices(iscol_o, &idx));
3330 
3331       PetscCall(ISDestroy(&iscol_o));
3332       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3333 
3334     } else if (BsubN < n) {
3335       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3336     }
3337 
3338     PetscCall(PetscFree(garray));
3339     *submat = M;
3340 
3341     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3342     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3343     PetscCall(ISDestroy(&isrow_d));
3344 
3345     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3346     PetscCall(ISDestroy(&iscol_d));
3347 
3348     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3349     PetscCall(ISDestroy(&iscol_o));
3350   }
3351   PetscFunctionReturn(PETSC_SUCCESS);
3352 }
3353 
3354 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3355 {
3356   IS        iscol_local = NULL, isrow_d;
3357   PetscInt  csize;
3358   PetscInt  n, i, j, start, end;
3359   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3360   MPI_Comm  comm;
3361 
3362   PetscFunctionBegin;
3363   /* If isrow has same processor distribution as mat,
3364      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3365   if (call == MAT_REUSE_MATRIX) {
3366     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3367     if (isrow_d) {
3368       sameRowDist  = PETSC_TRUE;
3369       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3370     } else {
3371       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3372       if (iscol_local) {
3373         sameRowDist  = PETSC_TRUE;
3374         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3375       }
3376     }
3377   } else {
3378     /* Check if isrow has same processor distribution as mat */
3379     sameDist[0] = PETSC_FALSE;
3380     PetscCall(ISGetLocalSize(isrow, &n));
3381     if (!n) {
3382       sameDist[0] = PETSC_TRUE;
3383     } else {
3384       PetscCall(ISGetMinMax(isrow, &i, &j));
3385       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3386       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3387     }
3388 
3389     /* Check if iscol has same processor distribution as mat */
3390     sameDist[1] = PETSC_FALSE;
3391     PetscCall(ISGetLocalSize(iscol, &n));
3392     if (!n) {
3393       sameDist[1] = PETSC_TRUE;
3394     } else {
3395       PetscCall(ISGetMinMax(iscol, &i, &j));
3396       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3397       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3398     }
3399 
3400     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3401     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3402     sameRowDist = tsameDist[0];
3403   }
3404 
3405   if (sameRowDist) {
3406     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3407       /* isrow and iscol have same processor distribution as mat */
3408       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3409       PetscFunctionReturn(PETSC_SUCCESS);
3410     } else { /* sameRowDist */
3411       /* isrow has same processor distribution as mat */
3412       if (call == MAT_INITIAL_MATRIX) {
3413         PetscBool sorted;
3414         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3415         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3416         PetscCall(ISGetSize(iscol, &i));
3417         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3418 
3419         PetscCall(ISSorted(iscol_local, &sorted));
3420         if (sorted) {
3421           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3422           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3423           PetscFunctionReturn(PETSC_SUCCESS);
3424         }
3425       } else { /* call == MAT_REUSE_MATRIX */
3426         IS iscol_sub;
3427         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3428         if (iscol_sub) {
3429           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3430           PetscFunctionReturn(PETSC_SUCCESS);
3431         }
3432       }
3433     }
3434   }
3435 
3436   /* General case: iscol -> iscol_local which has global size of iscol */
3437   if (call == MAT_REUSE_MATRIX) {
3438     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3439     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3440   } else {
3441     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3442   }
3443 
3444   PetscCall(ISGetLocalSize(iscol, &csize));
3445   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3446 
3447   if (call == MAT_INITIAL_MATRIX) {
3448     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3449     PetscCall(ISDestroy(&iscol_local));
3450   }
3451   PetscFunctionReturn(PETSC_SUCCESS);
3452 }
3453 
3454 /*@C
3455      MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3456          and "off-diagonal" part of the matrix in CSR format.
3457 
3458    Collective
3459 
3460    Input Parameters:
3461 +  comm - MPI communicator
3462 .  A - "diagonal" portion of matrix
3463 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3464 -  garray - global index of `B` columns
3465 
3466    Output Parameter:
3467 .   mat - the matrix, with input `A` as its local diagonal matrix
3468 
3469   Level: advanced
3470 
3471    Notes:
3472    See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3473 
3474    `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3475 
3476 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3477 @*/
3478 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3479 {
3480   Mat_MPIAIJ        *maij;
3481   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3482   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3483   const PetscScalar *oa;
3484   Mat                Bnew;
3485   PetscInt           m, n, N;
3486   MatType            mpi_mat_type;
3487 
3488   PetscFunctionBegin;
3489   PetscCall(MatCreate(comm, mat));
3490   PetscCall(MatGetSize(A, &m, &n));
3491   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3492   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3493   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3494   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3495 
3496   /* Get global columns of mat */
3497   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3498 
3499   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3500   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3501   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3502   PetscCall(MatSetType(*mat, mpi_mat_type));
3503 
3504   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3505   maij = (Mat_MPIAIJ *)(*mat)->data;
3506 
3507   (*mat)->preallocated = PETSC_TRUE;
3508 
3509   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3510   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3511 
3512   /* Set A as diagonal portion of *mat */
3513   maij->A = A;
3514 
3515   nz = oi[m];
3516   for (i = 0; i < nz; i++) {
3517     col   = oj[i];
3518     oj[i] = garray[col];
3519   }
3520 
3521   /* Set Bnew as off-diagonal portion of *mat */
3522   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3523   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3524   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3525   bnew        = (Mat_SeqAIJ *)Bnew->data;
3526   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3527   maij->B     = Bnew;
3528 
3529   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3530 
3531   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3532   b->free_a       = PETSC_FALSE;
3533   b->free_ij      = PETSC_FALSE;
3534   PetscCall(MatDestroy(&B));
3535 
3536   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3537   bnew->free_a       = PETSC_TRUE;
3538   bnew->free_ij      = PETSC_TRUE;
3539 
3540   /* condense columns of maij->B */
3541   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3542   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3543   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3544   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3545   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3546   PetscFunctionReturn(PETSC_SUCCESS);
3547 }
3548 
3549 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3550 
3551 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3552 {
3553   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3554   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3555   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3556   Mat             M, Msub, B = a->B;
3557   MatScalar      *aa;
3558   Mat_SeqAIJ     *aij;
3559   PetscInt       *garray = a->garray, *colsub, Ncols;
3560   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3561   IS              iscol_sub, iscmap;
3562   const PetscInt *is_idx, *cmap;
3563   PetscBool       allcolumns = PETSC_FALSE;
3564   MPI_Comm        comm;
3565 
3566   PetscFunctionBegin;
3567   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3568   if (call == MAT_REUSE_MATRIX) {
3569     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3570     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3571     PetscCall(ISGetLocalSize(iscol_sub, &count));
3572 
3573     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3574     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3575 
3576     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3577     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3578 
3579     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3580 
3581   } else { /* call == MAT_INITIAL_MATRIX) */
3582     PetscBool flg;
3583 
3584     PetscCall(ISGetLocalSize(iscol, &n));
3585     PetscCall(ISGetSize(iscol, &Ncols));
3586 
3587     /* (1) iscol -> nonscalable iscol_local */
3588     /* Check for special case: each processor gets entire matrix columns */
3589     PetscCall(ISIdentity(iscol_local, &flg));
3590     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3591     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3592     if (allcolumns) {
3593       iscol_sub = iscol_local;
3594       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3595       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3596 
3597     } else {
3598       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3599       PetscInt *idx, *cmap1, k;
3600       PetscCall(PetscMalloc1(Ncols, &idx));
3601       PetscCall(PetscMalloc1(Ncols, &cmap1));
3602       PetscCall(ISGetIndices(iscol_local, &is_idx));
3603       count = 0;
3604       k     = 0;
3605       for (i = 0; i < Ncols; i++) {
3606         j = is_idx[i];
3607         if (j >= cstart && j < cend) {
3608           /* diagonal part of mat */
3609           idx[count]     = j;
3610           cmap1[count++] = i; /* column index in submat */
3611         } else if (Bn) {
3612           /* off-diagonal part of mat */
3613           if (j == garray[k]) {
3614             idx[count]     = j;
3615             cmap1[count++] = i; /* column index in submat */
3616           } else if (j > garray[k]) {
3617             while (j > garray[k] && k < Bn - 1) k++;
3618             if (j == garray[k]) {
3619               idx[count]     = j;
3620               cmap1[count++] = i; /* column index in submat */
3621             }
3622           }
3623         }
3624       }
3625       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3626 
3627       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3628       PetscCall(ISGetBlockSize(iscol, &cbs));
3629       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3630 
3631       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3632     }
3633 
3634     /* (3) Create sequential Msub */
3635     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3636   }
3637 
3638   PetscCall(ISGetLocalSize(iscol_sub, &count));
3639   aij = (Mat_SeqAIJ *)(Msub)->data;
3640   ii  = aij->i;
3641   PetscCall(ISGetIndices(iscmap, &cmap));
3642 
3643   /*
3644       m - number of local rows
3645       Ncols - number of columns (same on all processors)
3646       rstart - first row in new global matrix generated
3647   */
3648   PetscCall(MatGetSize(Msub, &m, NULL));
3649 
3650   if (call == MAT_INITIAL_MATRIX) {
3651     /* (4) Create parallel newmat */
3652     PetscMPIInt rank, size;
3653     PetscInt    csize;
3654 
3655     PetscCallMPI(MPI_Comm_size(comm, &size));
3656     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3657 
3658     /*
3659         Determine the number of non-zeros in the diagonal and off-diagonal
3660         portions of the matrix in order to do correct preallocation
3661     */
3662 
3663     /* first get start and end of "diagonal" columns */
3664     PetscCall(ISGetLocalSize(iscol, &csize));
3665     if (csize == PETSC_DECIDE) {
3666       PetscCall(ISGetSize(isrow, &mglobal));
3667       if (mglobal == Ncols) { /* square matrix */
3668         nlocal = m;
3669       } else {
3670         nlocal = Ncols / size + ((Ncols % size) > rank);
3671       }
3672     } else {
3673       nlocal = csize;
3674     }
3675     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3676     rstart = rend - nlocal;
3677     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3678 
3679     /* next, compute all the lengths */
3680     jj = aij->j;
3681     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3682     olens = dlens + m;
3683     for (i = 0; i < m; i++) {
3684       jend = ii[i + 1] - ii[i];
3685       olen = 0;
3686       dlen = 0;
3687       for (j = 0; j < jend; j++) {
3688         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3689         else dlen++;
3690         jj++;
3691       }
3692       olens[i] = olen;
3693       dlens[i] = dlen;
3694     }
3695 
3696     PetscCall(ISGetBlockSize(isrow, &bs));
3697     PetscCall(ISGetBlockSize(iscol, &cbs));
3698 
3699     PetscCall(MatCreate(comm, &M));
3700     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3701     PetscCall(MatSetBlockSizes(M, bs, cbs));
3702     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3703     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3704     PetscCall(PetscFree(dlens));
3705 
3706   } else { /* call == MAT_REUSE_MATRIX */
3707     M = *newmat;
3708     PetscCall(MatGetLocalSize(M, &i, NULL));
3709     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3710     PetscCall(MatZeroEntries(M));
3711     /*
3712          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3713        rather than the slower MatSetValues().
3714     */
3715     M->was_assembled = PETSC_TRUE;
3716     M->assembled     = PETSC_FALSE;
3717   }
3718 
3719   /* (5) Set values of Msub to *newmat */
3720   PetscCall(PetscMalloc1(count, &colsub));
3721   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3722 
3723   jj = aij->j;
3724   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3725   for (i = 0; i < m; i++) {
3726     row = rstart + i;
3727     nz  = ii[i + 1] - ii[i];
3728     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3729     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3730     jj += nz;
3731     aa += nz;
3732   }
3733   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3734   PetscCall(ISRestoreIndices(iscmap, &cmap));
3735 
3736   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3737   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3738 
3739   PetscCall(PetscFree(colsub));
3740 
3741   /* save Msub, iscol_sub and iscmap used in processor for next request */
3742   if (call == MAT_INITIAL_MATRIX) {
3743     *newmat = M;
3744     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3745     PetscCall(MatDestroy(&Msub));
3746 
3747     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3748     PetscCall(ISDestroy(&iscol_sub));
3749 
3750     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3751     PetscCall(ISDestroy(&iscmap));
3752 
3753     if (iscol_local) {
3754       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3755       PetscCall(ISDestroy(&iscol_local));
3756     }
3757   }
3758   PetscFunctionReturn(PETSC_SUCCESS);
3759 }
3760 
3761 /*
3762     Not great since it makes two copies of the submatrix, first an SeqAIJ
3763   in local and then by concatenating the local matrices the end result.
3764   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3765 
3766   This requires a sequential iscol with all indices.
3767 */
3768 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3769 {
3770   PetscMPIInt rank, size;
3771   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3772   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3773   Mat         M, Mreuse;
3774   MatScalar  *aa, *vwork;
3775   MPI_Comm    comm;
3776   Mat_SeqAIJ *aij;
3777   PetscBool   colflag, allcolumns = PETSC_FALSE;
3778 
3779   PetscFunctionBegin;
3780   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3781   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3782   PetscCallMPI(MPI_Comm_size(comm, &size));
3783 
3784   /* Check for special case: each processor gets entire matrix columns */
3785   PetscCall(ISIdentity(iscol, &colflag));
3786   PetscCall(ISGetLocalSize(iscol, &n));
3787   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3788   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3789 
3790   if (call == MAT_REUSE_MATRIX) {
3791     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3792     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3793     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3794   } else {
3795     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3796   }
3797 
3798   /*
3799       m - number of local rows
3800       n - number of columns (same on all processors)
3801       rstart - first row in new global matrix generated
3802   */
3803   PetscCall(MatGetSize(Mreuse, &m, &n));
3804   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3805   if (call == MAT_INITIAL_MATRIX) {
3806     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3807     ii  = aij->i;
3808     jj  = aij->j;
3809 
3810     /*
3811         Determine the number of non-zeros in the diagonal and off-diagonal
3812         portions of the matrix in order to do correct preallocation
3813     */
3814 
3815     /* first get start and end of "diagonal" columns */
3816     if (csize == PETSC_DECIDE) {
3817       PetscCall(ISGetSize(isrow, &mglobal));
3818       if (mglobal == n) { /* square matrix */
3819         nlocal = m;
3820       } else {
3821         nlocal = n / size + ((n % size) > rank);
3822       }
3823     } else {
3824       nlocal = csize;
3825     }
3826     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3827     rstart = rend - nlocal;
3828     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3829 
3830     /* next, compute all the lengths */
3831     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3832     olens = dlens + m;
3833     for (i = 0; i < m; i++) {
3834       jend = ii[i + 1] - ii[i];
3835       olen = 0;
3836       dlen = 0;
3837       for (j = 0; j < jend; j++) {
3838         if (*jj < rstart || *jj >= rend) olen++;
3839         else dlen++;
3840         jj++;
3841       }
3842       olens[i] = olen;
3843       dlens[i] = dlen;
3844     }
3845     PetscCall(MatCreate(comm, &M));
3846     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3847     PetscCall(MatSetBlockSizes(M, bs, cbs));
3848     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3849     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3850     PetscCall(PetscFree(dlens));
3851   } else {
3852     PetscInt ml, nl;
3853 
3854     M = *newmat;
3855     PetscCall(MatGetLocalSize(M, &ml, &nl));
3856     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3857     PetscCall(MatZeroEntries(M));
3858     /*
3859          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3860        rather than the slower MatSetValues().
3861     */
3862     M->was_assembled = PETSC_TRUE;
3863     M->assembled     = PETSC_FALSE;
3864   }
3865   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3866   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3867   ii  = aij->i;
3868   jj  = aij->j;
3869 
3870   /* trigger copy to CPU if needed */
3871   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3872   for (i = 0; i < m; i++) {
3873     row   = rstart + i;
3874     nz    = ii[i + 1] - ii[i];
3875     cwork = jj;
3876     jj += nz;
3877     vwork = aa;
3878     aa += nz;
3879     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3880   }
3881   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3882 
3883   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3884   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3885   *newmat = M;
3886 
3887   /* save submatrix used in processor for next request */
3888   if (call == MAT_INITIAL_MATRIX) {
3889     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3890     PetscCall(MatDestroy(&Mreuse));
3891   }
3892   PetscFunctionReturn(PETSC_SUCCESS);
3893 }
3894 
3895 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3896 {
3897   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3898   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3899   const PetscInt *JJ;
3900   PetscBool       nooffprocentries;
3901   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3902 
3903   PetscFunctionBegin;
3904   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3905 
3906   PetscCall(PetscLayoutSetUp(B->rmap));
3907   PetscCall(PetscLayoutSetUp(B->cmap));
3908   m      = B->rmap->n;
3909   cstart = B->cmap->rstart;
3910   cend   = B->cmap->rend;
3911   rstart = B->rmap->rstart;
3912 
3913   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3914 
3915   if (PetscDefined(USE_DEBUG)) {
3916     for (i = 0; i < m; i++) {
3917       nnz = Ii[i + 1] - Ii[i];
3918       JJ  = J + Ii[i];
3919       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3920       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3921       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3922     }
3923   }
3924 
3925   for (i = 0; i < m; i++) {
3926     nnz     = Ii[i + 1] - Ii[i];
3927     JJ      = J + Ii[i];
3928     nnz_max = PetscMax(nnz_max, nnz);
3929     d       = 0;
3930     for (j = 0; j < nnz; j++) {
3931       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3932     }
3933     d_nnz[i] = d;
3934     o_nnz[i] = nnz - d;
3935   }
3936   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3937   PetscCall(PetscFree2(d_nnz, o_nnz));
3938 
3939   for (i = 0; i < m; i++) {
3940     ii = i + rstart;
3941     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3942   }
3943   nooffprocentries    = B->nooffprocentries;
3944   B->nooffprocentries = PETSC_TRUE;
3945   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3946   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3947   B->nooffprocentries = nooffprocentries;
3948 
3949   /* count number of entries below block diagonal */
3950   PetscCall(PetscFree(Aij->ld));
3951   PetscCall(PetscCalloc1(m, &ld));
3952   Aij->ld = ld;
3953   for (i = 0; i < m; i++) {
3954     nnz = Ii[i + 1] - Ii[i];
3955     j   = 0;
3956     while (j < nnz && J[j] < cstart) j++;
3957     ld[i] = j;
3958     J += nnz;
3959   }
3960 
3961   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3962   PetscFunctionReturn(PETSC_SUCCESS);
3963 }
3964 
3965 /*@
3966    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3967    (the default parallel PETSc format).
3968 
3969    Collective
3970 
3971    Input Parameters:
3972 +  B - the matrix
3973 .  i - the indices into j for the start of each local row (starts with zero)
3974 .  j - the column indices for each local row (starts with zero)
3975 -  v - optional values in the matrix
3976 
3977    Level: developer
3978 
3979    Notes:
3980        The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3981      thus you CANNOT change the matrix entries by changing the values of `v` after you have
3982      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3983 
3984        The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3985 
3986        The format which is used for the sparse matrix input, is equivalent to a
3987     row-major ordering.. i.e for the following matrix, the input data expected is
3988     as shown
3989 
3990 .vb
3991         1 0 0
3992         2 0 3     P0
3993        -------
3994         4 5 6     P1
3995 
3996      Process0 [P0] rows_owned=[0,1]
3997         i =  {0,1,3}  [size = nrow+1  = 2+1]
3998         j =  {0,0,2}  [size = 3]
3999         v =  {1,2,3}  [size = 3]
4000 
4001      Process1 [P1] rows_owned=[2]
4002         i =  {0,3}    [size = nrow+1  = 1+1]
4003         j =  {0,1,2}  [size = 3]
4004         v =  {4,5,6}  [size = 3]
4005 .ve
4006 
4007 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
4008           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
4009 @*/
4010 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4011 {
4012   PetscFunctionBegin;
4013   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4014   PetscFunctionReturn(PETSC_SUCCESS);
4015 }
4016 
4017 /*@C
4018    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4019    (the default parallel PETSc format).  For good matrix assembly performance
4020    the user should preallocate the matrix storage by setting the parameters
4021    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4022 
4023    Collective
4024 
4025    Input Parameters:
4026 +  B - the matrix
4027 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4028            (same value is used for all local rows)
4029 .  d_nnz - array containing the number of nonzeros in the various rows of the
4030            DIAGONAL portion of the local submatrix (possibly different for each row)
4031            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4032            The size of this array is equal to the number of local rows, i.e 'm'.
4033            For matrices that will be factored, you must leave room for (and set)
4034            the diagonal entry even if it is zero.
4035 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4036            submatrix (same value is used for all local rows).
4037 -  o_nnz - array containing the number of nonzeros in the various rows of the
4038            OFF-DIAGONAL portion of the local submatrix (possibly different for
4039            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4040            structure. The size of this array is equal to the number
4041            of local rows, i.e 'm'.
4042 
4043    Usage:
4044    Consider the following 8x8 matrix with 34 non-zero values, that is
4045    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4046    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4047    as follows
4048 
4049 .vb
4050             1  2  0  |  0  3  0  |  0  4
4051     Proc0   0  5  6  |  7  0  0  |  8  0
4052             9  0 10  | 11  0  0  | 12  0
4053     -------------------------------------
4054            13  0 14  | 15 16 17  |  0  0
4055     Proc1   0 18  0  | 19 20 21  |  0  0
4056             0  0  0  | 22 23  0  | 24  0
4057     -------------------------------------
4058     Proc2  25 26 27  |  0  0 28  | 29  0
4059            30  0  0  | 31 32 33  |  0 34
4060 .ve
4061 
4062    This can be represented as a collection of submatrices as
4063 .vb
4064       A B C
4065       D E F
4066       G H I
4067 .ve
4068 
4069    Where the submatrices A,B,C are owned by proc0, D,E,F are
4070    owned by proc1, G,H,I are owned by proc2.
4071 
4072    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4073    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4074    The 'M','N' parameters are 8,8, and have the same values on all procs.
4075 
4076    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4077    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4078    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4079    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4080    part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4081    matrix, ans [DF] as another `MATSEQAIJ` matrix.
4082 
4083    When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4084    allocated for every row of the local diagonal submatrix, and `o_nz`
4085    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4086    One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4087    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4088    In this case, the values of `d_nz`, `o_nz` are
4089 .vb
4090      proc0  dnz = 2, o_nz = 2
4091      proc1  dnz = 3, o_nz = 2
4092      proc2  dnz = 1, o_nz = 4
4093 .ve
4094    We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4095    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4096    for proc3. i.e we are using 12+15+10=37 storage locations to store
4097    34 values.
4098 
4099    When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4100    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4101    In the above case the values for `d_nnz`, `o_nnz` are
4102 .vb
4103      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4104      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4105      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4106 .ve
4107    Here the space allocated is sum of all the above values i.e 34, and
4108    hence pre-allocation is perfect.
4109 
4110    Level: intermediate
4111 
4112    Notes:
4113    If the *_nnz parameter is given then the *_nz parameter is ignored
4114 
4115    The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4116    storage.  The stored row and column indices begin with zero.
4117    See [Sparse Matrices](sec_matsparse) for details.
4118 
4119    The parallel matrix is partitioned such that the first m0 rows belong to
4120    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4121    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4122 
4123    The DIAGONAL portion of the local submatrix of a processor can be defined
4124    as the submatrix which is obtained by extraction the part corresponding to
4125    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4126    first row that belongs to the processor, r2 is the last row belonging to
4127    the this processor, and c1-c2 is range of indices of the local part of a
4128    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4129    common case of a square matrix, the row and column ranges are the same and
4130    the DIAGONAL part is also square. The remaining portion of the local
4131    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4132 
4133    If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4134 
4135    You can call `MatGetInfo()` to get information on how effective the preallocation was;
4136    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4137    You can also run with the option `-info` and look for messages with the string
4138    malloc in them to see if additional memory allocation was needed.
4139 
4140 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4141           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4142 @*/
4143 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4144 {
4145   PetscFunctionBegin;
4146   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4147   PetscValidType(B, 1);
4148   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4149   PetscFunctionReturn(PETSC_SUCCESS);
4150 }
4151 
4152 /*@
4153      MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4154          CSR format for the local rows.
4155 
4156    Collective
4157 
4158    Input Parameters:
4159 +  comm - MPI communicator
4160 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4161 .  n - This value should be the same as the local size used in creating the
4162        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4163        calculated if N is given) For square matrices n is almost always m.
4164 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4165 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4166 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4167 .   j - column indices
4168 -   a - optional matrix values
4169 
4170    Output Parameter:
4171 .   mat - the matrix
4172 
4173    Level: intermediate
4174 
4175    Notes:
4176        The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4177      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4178      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4179 
4180        The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4181 
4182        The format which is used for the sparse matrix input, is equivalent to a
4183     row-major ordering.. i.e for the following matrix, the input data expected is
4184     as shown
4185 
4186        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4187 .vb
4188         1 0 0
4189         2 0 3     P0
4190        -------
4191         4 5 6     P1
4192 
4193      Process0 [P0] rows_owned=[0,1]
4194         i =  {0,1,3}  [size = nrow+1  = 2+1]
4195         j =  {0,0,2}  [size = 3]
4196         v =  {1,2,3}  [size = 3]
4197 
4198      Process1 [P1] rows_owned=[2]
4199         i =  {0,3}    [size = nrow+1  = 1+1]
4200         j =  {0,1,2}  [size = 3]
4201         v =  {4,5,6}  [size = 3]
4202 .ve
4203 
4204 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4205           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4206 @*/
4207 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4208 {
4209   PetscFunctionBegin;
4210   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4211   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4212   PetscCall(MatCreate(comm, mat));
4213   PetscCall(MatSetSizes(*mat, m, n, M, N));
4214   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4215   PetscCall(MatSetType(*mat, MATMPIAIJ));
4216   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4217   PetscFunctionReturn(PETSC_SUCCESS);
4218 }
4219 
4220 /*@
4221      MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4222      CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4223      from `MatCreateMPIAIJWithArrays()`
4224 
4225      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4226 
4227    Collective
4228 
4229    Input Parameters:
4230 +  mat - the matrix
4231 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4232 .  n - This value should be the same as the local size used in creating the
4233        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4234        calculated if N is given) For square matrices n is almost always m.
4235 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4236 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4237 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4238 .  J - column indices
4239 -  v - matrix values
4240 
4241    Level: deprecated
4242 
4243 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4244           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4245 @*/
4246 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4247 {
4248   PetscInt        nnz, i;
4249   PetscBool       nooffprocentries;
4250   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4251   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4252   PetscScalar    *ad, *ao;
4253   PetscInt        ldi, Iii, md;
4254   const PetscInt *Adi = Ad->i;
4255   PetscInt       *ld  = Aij->ld;
4256 
4257   PetscFunctionBegin;
4258   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4259   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4260   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4261   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4262 
4263   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4264   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4265 
4266   for (i = 0; i < m; i++) {
4267     nnz = Ii[i + 1] - Ii[i];
4268     Iii = Ii[i];
4269     ldi = ld[i];
4270     md  = Adi[i + 1] - Adi[i];
4271     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4272     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4273     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4274     ad += md;
4275     ao += nnz - md;
4276   }
4277   nooffprocentries      = mat->nooffprocentries;
4278   mat->nooffprocentries = PETSC_TRUE;
4279   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4280   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4281   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4282   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4283   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4284   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4285   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4286   mat->nooffprocentries = nooffprocentries;
4287   PetscFunctionReturn(PETSC_SUCCESS);
4288 }
4289 
4290 /*@
4291      MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4292 
4293    Collective
4294 
4295    Input Parameters:
4296 +  mat - the matrix
4297 -  v - matrix values, stored by row
4298 
4299    Level: intermediate
4300 
4301    Note:
4302    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4303 
4304 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4305           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4306 @*/
4307 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4308 {
4309   PetscInt        nnz, i, m;
4310   PetscBool       nooffprocentries;
4311   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4312   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4313   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4314   PetscScalar    *ad, *ao;
4315   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4316   PetscInt        ldi, Iii, md;
4317   PetscInt       *ld = Aij->ld;
4318 
4319   PetscFunctionBegin;
4320   m = mat->rmap->n;
4321 
4322   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4323   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4324   Iii = 0;
4325   for (i = 0; i < m; i++) {
4326     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4327     ldi = ld[i];
4328     md  = Adi[i + 1] - Adi[i];
4329     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4330     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4331     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4332     ad += md;
4333     ao += nnz - md;
4334     Iii += nnz;
4335   }
4336   nooffprocentries      = mat->nooffprocentries;
4337   mat->nooffprocentries = PETSC_TRUE;
4338   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4339   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4340   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4341   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4342   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4343   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4344   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4345   mat->nooffprocentries = nooffprocentries;
4346   PetscFunctionReturn(PETSC_SUCCESS);
4347 }
4348 
4349 /*@C
4350    MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4351    (the default parallel PETSc format).  For good matrix assembly performance
4352    the user should preallocate the matrix storage by setting the parameters
4353    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4354 
4355    Collective
4356 
4357    Input Parameters:
4358 +  comm - MPI communicator
4359 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4360            This value should be the same as the local size used in creating the
4361            y vector for the matrix-vector product y = Ax.
4362 .  n - This value should be the same as the local size used in creating the
4363        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4364        calculated if N is given) For square matrices n is almost always m.
4365 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4366 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4367 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4368            (same value is used for all local rows)
4369 .  d_nnz - array containing the number of nonzeros in the various rows of the
4370            DIAGONAL portion of the local submatrix (possibly different for each row)
4371            or `NULL`, if `d_nz` is used to specify the nonzero structure.
4372            The size of this array is equal to the number of local rows, i.e 'm'.
4373 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4374            submatrix (same value is used for all local rows).
4375 -  o_nnz - array containing the number of nonzeros in the various rows of the
4376            OFF-DIAGONAL portion of the local submatrix (possibly different for
4377            each row) or `NULL`, if `o_nz` is used to specify the nonzero
4378            structure. The size of this array is equal to the number
4379            of local rows, i.e 'm'.
4380 
4381    Output Parameter:
4382 .  A - the matrix
4383 
4384    Options Database Keys:
4385 +  -mat_no_inode  - Do not use inodes
4386 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4387 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4388         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4389         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4390 
4391    Level: intermediate
4392 
4393    Notes:
4394    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4395    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4396    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4397 
4398    If the *_nnz parameter is given then the *_nz parameter is ignored
4399 
4400    The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4401    processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4402    storage requirements for this matrix.
4403 
4404    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4405    processor than it must be used on all processors that share the object for
4406    that argument.
4407 
4408    The user MUST specify either the local or global matrix dimensions
4409    (possibly both).
4410 
4411    The parallel matrix is partitioned across processors such that the
4412    first m0 rows belong to process 0, the next m1 rows belong to
4413    process 1, the next m2 rows belong to process 2 etc.. where
4414    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4415    values corresponding to [m x N] submatrix.
4416 
4417    The columns are logically partitioned with the n0 columns belonging
4418    to 0th partition, the next n1 columns belonging to the next
4419    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4420 
4421    The DIAGONAL portion of the local submatrix on any given processor
4422    is the submatrix corresponding to the rows and columns m,n
4423    corresponding to the given processor. i.e diagonal matrix on
4424    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4425    etc. The remaining portion of the local submatrix [m x (N-n)]
4426    constitute the OFF-DIAGONAL portion. The example below better
4427    illustrates this concept.
4428 
4429    For a square global matrix we define each processor's diagonal portion
4430    to be its local rows and the corresponding columns (a square submatrix);
4431    each processor's off-diagonal portion encompasses the remainder of the
4432    local matrix (a rectangular submatrix).
4433 
4434    If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4435 
4436    When calling this routine with a single process communicator, a matrix of
4437    type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4438    type of communicator, use the construction mechanism
4439 .vb
4440      MatCreate(...,&A);
4441      MatSetType(A,MATMPIAIJ);
4442      MatSetSizes(A, m,n,M,N);
4443      MatMPIAIJSetPreallocation(A,...);
4444 .ve
4445 
4446    By default, this format uses inodes (identical nodes) when possible.
4447    We search for consecutive rows with the same nonzero structure, thereby
4448    reusing matrix information to achieve increased efficiency.
4449 
4450    Usage:
4451    Consider the following 8x8 matrix with 34 non-zero values, that is
4452    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4453    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4454    as follows
4455 
4456 .vb
4457             1  2  0  |  0  3  0  |  0  4
4458     Proc0   0  5  6  |  7  0  0  |  8  0
4459             9  0 10  | 11  0  0  | 12  0
4460     -------------------------------------
4461            13  0 14  | 15 16 17  |  0  0
4462     Proc1   0 18  0  | 19 20 21  |  0  0
4463             0  0  0  | 22 23  0  | 24  0
4464     -------------------------------------
4465     Proc2  25 26 27  |  0  0 28  | 29  0
4466            30  0  0  | 31 32 33  |  0 34
4467 .ve
4468 
4469    This can be represented as a collection of submatrices as
4470 
4471 .vb
4472       A B C
4473       D E F
4474       G H I
4475 .ve
4476 
4477    Where the submatrices A,B,C are owned by proc0, D,E,F are
4478    owned by proc1, G,H,I are owned by proc2.
4479 
4480    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4481    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4482    The 'M','N' parameters are 8,8, and have the same values on all procs.
4483 
4484    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4485    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4486    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4487    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4488    part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4489    matrix, ans [DF] as another SeqAIJ matrix.
4490 
4491    When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4492    allocated for every row of the local diagonal submatrix, and `o_nz`
4493    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4494    One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4495    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4496    In this case, the values of `d_nz`,`o_nz` are
4497 .vb
4498      proc0  dnz = 2, o_nz = 2
4499      proc1  dnz = 3, o_nz = 2
4500      proc2  dnz = 1, o_nz = 4
4501 .ve
4502    We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4503    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4504    for proc3. i.e we are using 12+15+10=37 storage locations to store
4505    34 values.
4506 
4507    When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4508    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4509    In the above case the values for d_nnz,o_nnz are
4510 .vb
4511      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4512      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4513      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4514 .ve
4515    Here the space allocated is sum of all the above values i.e 34, and
4516    hence pre-allocation is perfect.
4517 
4518 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4519           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4520 @*/
4521 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4522 {
4523   PetscMPIInt size;
4524 
4525   PetscFunctionBegin;
4526   PetscCall(MatCreate(comm, A));
4527   PetscCall(MatSetSizes(*A, m, n, M, N));
4528   PetscCallMPI(MPI_Comm_size(comm, &size));
4529   if (size > 1) {
4530     PetscCall(MatSetType(*A, MATMPIAIJ));
4531     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4532   } else {
4533     PetscCall(MatSetType(*A, MATSEQAIJ));
4534     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4535   }
4536   PetscFunctionReturn(PETSC_SUCCESS);
4537 }
4538 
4539 /*MC
4540     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4541 
4542     Synopsis:
4543     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4544 
4545     Not Collective
4546 
4547     Input Parameter:
4548 .   A - the `MATMPIAIJ` matrix
4549 
4550     Output Parameters:
4551 +   Ad - the diagonal portion of the matrix
4552 .   Ao - the off diagonal portion of the matrix
4553 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4554 -   ierr - error code
4555 
4556      Level: advanced
4557 
4558     Note:
4559     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4560 
4561 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4562 M*/
4563 
4564 /*MC
4565     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4566 
4567     Synopsis:
4568     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4569 
4570     Not Collective
4571 
4572     Input Parameters:
4573 +   A - the `MATMPIAIJ` matrix
4574 .   Ad - the diagonal portion of the matrix
4575 .   Ao - the off diagonal portion of the matrix
4576 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4577 -   ierr - error code
4578 
4579      Level: advanced
4580 
4581 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4582 M*/
4583 
4584 /*@C
4585   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4586 
4587   Not Collective
4588 
4589   Input Parameter:
4590 . A - The `MATMPIAIJ` matrix
4591 
4592   Output Parameters:
4593 + Ad - The local diagonal block as a `MATSEQAIJ` matrix
4594 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix
4595 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4596 
4597   Level: intermediate
4598 
4599   Note:
4600   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4601   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4602   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4603   local column numbers to global column numbers in the original matrix.
4604 
4605   Fortran Note:
4606   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4607 
4608 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4609 @*/
4610 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4611 {
4612   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4613   PetscBool   flg;
4614 
4615   PetscFunctionBegin;
4616   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4617   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4618   if (Ad) *Ad = a->A;
4619   if (Ao) *Ao = a->B;
4620   if (colmap) *colmap = a->garray;
4621   PetscFunctionReturn(PETSC_SUCCESS);
4622 }
4623 
4624 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4625 {
4626   PetscInt     m, N, i, rstart, nnz, Ii;
4627   PetscInt    *indx;
4628   PetscScalar *values;
4629   MatType      rootType;
4630 
4631   PetscFunctionBegin;
4632   PetscCall(MatGetSize(inmat, &m, &N));
4633   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4634     PetscInt *dnz, *onz, sum, bs, cbs;
4635 
4636     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4637     /* Check sum(n) = N */
4638     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4639     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4640 
4641     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4642     rstart -= m;
4643 
4644     MatPreallocateBegin(comm, m, n, dnz, onz);
4645     for (i = 0; i < m; i++) {
4646       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4647       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4648       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4649     }
4650 
4651     PetscCall(MatCreate(comm, outmat));
4652     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4653     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4654     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4655     PetscCall(MatGetRootType_Private(inmat, &rootType));
4656     PetscCall(MatSetType(*outmat, rootType));
4657     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4658     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4659     MatPreallocateEnd(dnz, onz);
4660     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4661   }
4662 
4663   /* numeric phase */
4664   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4665   for (i = 0; i < m; i++) {
4666     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4667     Ii = i + rstart;
4668     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4669     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4670   }
4671   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4672   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4673   PetscFunctionReturn(PETSC_SUCCESS);
4674 }
4675 
4676 PetscErrorCode MatFileSplit(Mat A, char *outfile)
4677 {
4678   PetscMPIInt        rank;
4679   PetscInt           m, N, i, rstart, nnz;
4680   size_t             len;
4681   const PetscInt    *indx;
4682   PetscViewer        out;
4683   char              *name;
4684   Mat                B;
4685   const PetscScalar *values;
4686 
4687   PetscFunctionBegin;
4688   PetscCall(MatGetLocalSize(A, &m, NULL));
4689   PetscCall(MatGetSize(A, NULL, &N));
4690   /* Should this be the type of the diagonal block of A? */
4691   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4692   PetscCall(MatSetSizes(B, m, N, m, N));
4693   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4694   PetscCall(MatSetType(B, MATSEQAIJ));
4695   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4696   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4697   for (i = 0; i < m; i++) {
4698     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4699     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4700     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4701   }
4702   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4703   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4704 
4705   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4706   PetscCall(PetscStrlen(outfile, &len));
4707   PetscCall(PetscMalloc1(len + 6, &name));
4708   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4709   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4710   PetscCall(PetscFree(name));
4711   PetscCall(MatView(B, out));
4712   PetscCall(PetscViewerDestroy(&out));
4713   PetscCall(MatDestroy(&B));
4714   PetscFunctionReturn(PETSC_SUCCESS);
4715 }
4716 
4717 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4718 {
4719   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4720 
4721   PetscFunctionBegin;
4722   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4723   PetscCall(PetscFree(merge->id_r));
4724   PetscCall(PetscFree(merge->len_s));
4725   PetscCall(PetscFree(merge->len_r));
4726   PetscCall(PetscFree(merge->bi));
4727   PetscCall(PetscFree(merge->bj));
4728   PetscCall(PetscFree(merge->buf_ri[0]));
4729   PetscCall(PetscFree(merge->buf_ri));
4730   PetscCall(PetscFree(merge->buf_rj[0]));
4731   PetscCall(PetscFree(merge->buf_rj));
4732   PetscCall(PetscFree(merge->coi));
4733   PetscCall(PetscFree(merge->coj));
4734   PetscCall(PetscFree(merge->owners_co));
4735   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4736   PetscCall(PetscFree(merge));
4737   PetscFunctionReturn(PETSC_SUCCESS);
4738 }
4739 
4740 #include <../src/mat/utils/freespace.h>
4741 #include <petscbt.h>
4742 
4743 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4744 {
4745   MPI_Comm             comm;
4746   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4747   PetscMPIInt          size, rank, taga, *len_s;
4748   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4749   PetscInt             proc, m;
4750   PetscInt           **buf_ri, **buf_rj;
4751   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4752   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4753   MPI_Request         *s_waits, *r_waits;
4754   MPI_Status          *status;
4755   const MatScalar     *aa, *a_a;
4756   MatScalar          **abuf_r, *ba_i;
4757   Mat_Merge_SeqsToMPI *merge;
4758   PetscContainer       container;
4759 
4760   PetscFunctionBegin;
4761   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4762   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4763 
4764   PetscCallMPI(MPI_Comm_size(comm, &size));
4765   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4766 
4767   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4768   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4769   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4770   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4771   aa = a_a;
4772 
4773   bi     = merge->bi;
4774   bj     = merge->bj;
4775   buf_ri = merge->buf_ri;
4776   buf_rj = merge->buf_rj;
4777 
4778   PetscCall(PetscMalloc1(size, &status));
4779   owners = merge->rowmap->range;
4780   len_s  = merge->len_s;
4781 
4782   /* send and recv matrix values */
4783   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4784   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4785 
4786   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4787   for (proc = 0, k = 0; proc < size; proc++) {
4788     if (!len_s[proc]) continue;
4789     i = owners[proc];
4790     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4791     k++;
4792   }
4793 
4794   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4795   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4796   PetscCall(PetscFree(status));
4797 
4798   PetscCall(PetscFree(s_waits));
4799   PetscCall(PetscFree(r_waits));
4800 
4801   /* insert mat values of mpimat */
4802   PetscCall(PetscMalloc1(N, &ba_i));
4803   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4804 
4805   for (k = 0; k < merge->nrecv; k++) {
4806     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4807     nrows       = *(buf_ri_k[k]);
4808     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4809     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4810   }
4811 
4812   /* set values of ba */
4813   m = merge->rowmap->n;
4814   for (i = 0; i < m; i++) {
4815     arow = owners[rank] + i;
4816     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4817     bnzi = bi[i + 1] - bi[i];
4818     PetscCall(PetscArrayzero(ba_i, bnzi));
4819 
4820     /* add local non-zero vals of this proc's seqmat into ba */
4821     anzi   = ai[arow + 1] - ai[arow];
4822     aj     = a->j + ai[arow];
4823     aa     = a_a + ai[arow];
4824     nextaj = 0;
4825     for (j = 0; nextaj < anzi; j++) {
4826       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4827         ba_i[j] += aa[nextaj++];
4828       }
4829     }
4830 
4831     /* add received vals into ba */
4832     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4833       /* i-th row */
4834       if (i == *nextrow[k]) {
4835         anzi   = *(nextai[k] + 1) - *nextai[k];
4836         aj     = buf_rj[k] + *(nextai[k]);
4837         aa     = abuf_r[k] + *(nextai[k]);
4838         nextaj = 0;
4839         for (j = 0; nextaj < anzi; j++) {
4840           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4841             ba_i[j] += aa[nextaj++];
4842           }
4843         }
4844         nextrow[k]++;
4845         nextai[k]++;
4846       }
4847     }
4848     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4849   }
4850   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4851   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4852   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4853 
4854   PetscCall(PetscFree(abuf_r[0]));
4855   PetscCall(PetscFree(abuf_r));
4856   PetscCall(PetscFree(ba_i));
4857   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4858   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4859   PetscFunctionReturn(PETSC_SUCCESS);
4860 }
4861 
4862 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4863 {
4864   Mat                  B_mpi;
4865   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4866   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4867   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4868   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4869   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4870   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4871   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4872   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4873   MPI_Status          *status;
4874   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4875   PetscBT              lnkbt;
4876   Mat_Merge_SeqsToMPI *merge;
4877   PetscContainer       container;
4878 
4879   PetscFunctionBegin;
4880   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4881 
4882   /* make sure it is a PETSc comm */
4883   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4884   PetscCallMPI(MPI_Comm_size(comm, &size));
4885   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4886 
4887   PetscCall(PetscNew(&merge));
4888   PetscCall(PetscMalloc1(size, &status));
4889 
4890   /* determine row ownership */
4891   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4892   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4893   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4894   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4895   PetscCall(PetscLayoutSetUp(merge->rowmap));
4896   PetscCall(PetscMalloc1(size, &len_si));
4897   PetscCall(PetscMalloc1(size, &merge->len_s));
4898 
4899   m      = merge->rowmap->n;
4900   owners = merge->rowmap->range;
4901 
4902   /* determine the number of messages to send, their lengths */
4903   len_s = merge->len_s;
4904 
4905   len          = 0; /* length of buf_si[] */
4906   merge->nsend = 0;
4907   for (proc = 0; proc < size; proc++) {
4908     len_si[proc] = 0;
4909     if (proc == rank) {
4910       len_s[proc] = 0;
4911     } else {
4912       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4913       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4914     }
4915     if (len_s[proc]) {
4916       merge->nsend++;
4917       nrows = 0;
4918       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4919         if (ai[i + 1] > ai[i]) nrows++;
4920       }
4921       len_si[proc] = 2 * (nrows + 1);
4922       len += len_si[proc];
4923     }
4924   }
4925 
4926   /* determine the number and length of messages to receive for ij-structure */
4927   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4928   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4929 
4930   /* post the Irecv of j-structure */
4931   PetscCall(PetscCommGetNewTag(comm, &tagj));
4932   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4933 
4934   /* post the Isend of j-structure */
4935   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4936 
4937   for (proc = 0, k = 0; proc < size; proc++) {
4938     if (!len_s[proc]) continue;
4939     i = owners[proc];
4940     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4941     k++;
4942   }
4943 
4944   /* receives and sends of j-structure are complete */
4945   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4946   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4947 
4948   /* send and recv i-structure */
4949   PetscCall(PetscCommGetNewTag(comm, &tagi));
4950   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4951 
4952   PetscCall(PetscMalloc1(len + 1, &buf_s));
4953   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4954   for (proc = 0, k = 0; proc < size; proc++) {
4955     if (!len_s[proc]) continue;
4956     /* form outgoing message for i-structure:
4957          buf_si[0]:                 nrows to be sent
4958                [1:nrows]:           row index (global)
4959                [nrows+1:2*nrows+1]: i-structure index
4960     */
4961     nrows       = len_si[proc] / 2 - 1;
4962     buf_si_i    = buf_si + nrows + 1;
4963     buf_si[0]   = nrows;
4964     buf_si_i[0] = 0;
4965     nrows       = 0;
4966     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4967       anzi = ai[i + 1] - ai[i];
4968       if (anzi) {
4969         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4970         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4971         nrows++;
4972       }
4973     }
4974     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4975     k++;
4976     buf_si += len_si[proc];
4977   }
4978 
4979   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4980   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4981 
4982   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4983   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4984 
4985   PetscCall(PetscFree(len_si));
4986   PetscCall(PetscFree(len_ri));
4987   PetscCall(PetscFree(rj_waits));
4988   PetscCall(PetscFree2(si_waits, sj_waits));
4989   PetscCall(PetscFree(ri_waits));
4990   PetscCall(PetscFree(buf_s));
4991   PetscCall(PetscFree(status));
4992 
4993   /* compute a local seq matrix in each processor */
4994   /* allocate bi array and free space for accumulating nonzero column info */
4995   PetscCall(PetscMalloc1(m + 1, &bi));
4996   bi[0] = 0;
4997 
4998   /* create and initialize a linked list */
4999   nlnk = N + 1;
5000   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
5001 
5002   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5003   len = ai[owners[rank + 1]] - ai[owners[rank]];
5004   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
5005 
5006   current_space = free_space;
5007 
5008   /* determine symbolic info for each local row */
5009   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
5010 
5011   for (k = 0; k < merge->nrecv; k++) {
5012     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5013     nrows       = *buf_ri_k[k];
5014     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
5015     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
5016   }
5017 
5018   MatPreallocateBegin(comm, m, n, dnz, onz);
5019   len = 0;
5020   for (i = 0; i < m; i++) {
5021     bnzi = 0;
5022     /* add local non-zero cols of this proc's seqmat into lnk */
5023     arow = owners[rank] + i;
5024     anzi = ai[arow + 1] - ai[arow];
5025     aj   = a->j + ai[arow];
5026     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5027     bnzi += nlnk;
5028     /* add received col data into lnk */
5029     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5030       if (i == *nextrow[k]) {            /* i-th row */
5031         anzi = *(nextai[k] + 1) - *nextai[k];
5032         aj   = buf_rj[k] + *nextai[k];
5033         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5034         bnzi += nlnk;
5035         nextrow[k]++;
5036         nextai[k]++;
5037       }
5038     }
5039     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5040 
5041     /* if free space is not available, make more free space */
5042     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5043     /* copy data into free space, then initialize lnk */
5044     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5045     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5046 
5047     current_space->array += bnzi;
5048     current_space->local_used += bnzi;
5049     current_space->local_remaining -= bnzi;
5050 
5051     bi[i + 1] = bi[i] + bnzi;
5052   }
5053 
5054   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5055 
5056   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5057   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5058   PetscCall(PetscLLDestroy(lnk, lnkbt));
5059 
5060   /* create symbolic parallel matrix B_mpi */
5061   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5062   PetscCall(MatCreate(comm, &B_mpi));
5063   if (n == PETSC_DECIDE) {
5064     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5065   } else {
5066     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5067   }
5068   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5069   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5070   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5071   MatPreallocateEnd(dnz, onz);
5072   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5073 
5074   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5075   B_mpi->assembled = PETSC_FALSE;
5076   merge->bi        = bi;
5077   merge->bj        = bj;
5078   merge->buf_ri    = buf_ri;
5079   merge->buf_rj    = buf_rj;
5080   merge->coi       = NULL;
5081   merge->coj       = NULL;
5082   merge->owners_co = NULL;
5083 
5084   PetscCall(PetscCommDestroy(&comm));
5085 
5086   /* attach the supporting struct to B_mpi for reuse */
5087   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5088   PetscCall(PetscContainerSetPointer(container, merge));
5089   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5090   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5091   PetscCall(PetscContainerDestroy(&container));
5092   *mpimat = B_mpi;
5093 
5094   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5095   PetscFunctionReturn(PETSC_SUCCESS);
5096 }
5097 
5098 /*@C
5099       MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5100                  matrices from each processor
5101 
5102     Collective
5103 
5104    Input Parameters:
5105 +    comm - the communicators the parallel matrix will live on
5106 .    seqmat - the input sequential matrices
5107 .    m - number of local rows (or `PETSC_DECIDE`)
5108 .    n - number of local columns (or `PETSC_DECIDE`)
5109 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5110 
5111    Output Parameter:
5112 .    mpimat - the parallel matrix generated
5113 
5114     Level: advanced
5115 
5116    Note:
5117      The dimensions of the sequential matrix in each processor MUST be the same.
5118      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5119      destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5120 
5121 .seealso: [](chapter_matrices), `Mat`, `MatCreateAIJ()`
5122 @*/
5123 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5124 {
5125   PetscMPIInt size;
5126 
5127   PetscFunctionBegin;
5128   PetscCallMPI(MPI_Comm_size(comm, &size));
5129   if (size == 1) {
5130     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5131     if (scall == MAT_INITIAL_MATRIX) {
5132       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5133     } else {
5134       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5135     }
5136     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5137     PetscFunctionReturn(PETSC_SUCCESS);
5138   }
5139   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5140   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5141   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5142   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5143   PetscFunctionReturn(PETSC_SUCCESS);
5144 }
5145 
5146 /*@
5147      MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking its local rows and putting them into a sequential matrix with
5148           mlocal rows and n columns. Where mlocal is obtained with `MatGetLocalSize()` and n is the global column count obtained
5149           with `MatGetSize()`
5150 
5151     Not Collective
5152 
5153    Input Parameter:
5154 .    A - the matrix
5155 
5156    Output Parameter:
5157 .    A_loc - the local sequential matrix generated
5158 
5159     Level: developer
5160 
5161    Notes:
5162      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5163 
5164      Destroy the matrix with `MatDestroy()`
5165 
5166 .seealso: [](chapter_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5167 @*/
5168 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5169 {
5170   PetscBool mpi;
5171 
5172   PetscFunctionBegin;
5173   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5174   if (mpi) {
5175     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5176   } else {
5177     *A_loc = A;
5178     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5179   }
5180   PetscFunctionReturn(PETSC_SUCCESS);
5181 }
5182 
5183 /*@
5184      MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5185           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5186           with `MatGetSize()`
5187 
5188     Not Collective
5189 
5190    Input Parameters:
5191 +    A - the matrix
5192 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5193 
5194    Output Parameter:
5195 .    A_loc - the local sequential matrix generated
5196 
5197     Level: developer
5198 
5199    Notes:
5200      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5201 
5202      When the communicator associated with `A` has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A`.
5203      If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called.
5204      This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
5205      modify the values of the returned `A_loc`.
5206 
5207 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5208 @*/
5209 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5210 {
5211   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5212   Mat_SeqAIJ        *mat, *a, *b;
5213   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5214   const PetscScalar *aa, *ba, *aav, *bav;
5215   PetscScalar       *ca, *cam;
5216   PetscMPIInt        size;
5217   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5218   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5219   PetscBool          match;
5220 
5221   PetscFunctionBegin;
5222   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5223   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5224   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5225   if (size == 1) {
5226     if (scall == MAT_INITIAL_MATRIX) {
5227       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5228       *A_loc = mpimat->A;
5229     } else if (scall == MAT_REUSE_MATRIX) {
5230       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5231     }
5232     PetscFunctionReturn(PETSC_SUCCESS);
5233   }
5234 
5235   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5236   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5237   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5238   ai = a->i;
5239   aj = a->j;
5240   bi = b->i;
5241   bj = b->j;
5242   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5243   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5244   aa = aav;
5245   ba = bav;
5246   if (scall == MAT_INITIAL_MATRIX) {
5247     PetscCall(PetscMalloc1(1 + am, &ci));
5248     ci[0] = 0;
5249     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5250     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5251     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5252     k = 0;
5253     for (i = 0; i < am; i++) {
5254       ncols_o = bi[i + 1] - bi[i];
5255       ncols_d = ai[i + 1] - ai[i];
5256       /* off-diagonal portion of A */
5257       for (jo = 0; jo < ncols_o; jo++) {
5258         col = cmap[*bj];
5259         if (col >= cstart) break;
5260         cj[k] = col;
5261         bj++;
5262         ca[k++] = *ba++;
5263       }
5264       /* diagonal portion of A */
5265       for (j = 0; j < ncols_d; j++) {
5266         cj[k]   = cstart + *aj++;
5267         ca[k++] = *aa++;
5268       }
5269       /* off-diagonal portion of A */
5270       for (j = jo; j < ncols_o; j++) {
5271         cj[k]   = cmap[*bj++];
5272         ca[k++] = *ba++;
5273       }
5274     }
5275     /* put together the new matrix */
5276     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5277     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5278     /* Since these are PETSc arrays, change flags to free them as necessary. */
5279     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5280     mat->free_a  = PETSC_TRUE;
5281     mat->free_ij = PETSC_TRUE;
5282     mat->nonew   = 0;
5283   } else if (scall == MAT_REUSE_MATRIX) {
5284     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5285     ci  = mat->i;
5286     cj  = mat->j;
5287     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5288     for (i = 0; i < am; i++) {
5289       /* off-diagonal portion of A */
5290       ncols_o = bi[i + 1] - bi[i];
5291       for (jo = 0; jo < ncols_o; jo++) {
5292         col = cmap[*bj];
5293         if (col >= cstart) break;
5294         *cam++ = *ba++;
5295         bj++;
5296       }
5297       /* diagonal portion of A */
5298       ncols_d = ai[i + 1] - ai[i];
5299       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5300       /* off-diagonal portion of A */
5301       for (j = jo; j < ncols_o; j++) {
5302         *cam++ = *ba++;
5303         bj++;
5304       }
5305     }
5306     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5307   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5308   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5309   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5310   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5311   PetscFunctionReturn(PETSC_SUCCESS);
5312 }
5313 
5314 /*@
5315      MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5316           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5317 
5318     Not Collective
5319 
5320    Input Parameters:
5321 +    A - the matrix
5322 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5323 
5324    Output Parameters:
5325 +    glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5326 -    A_loc - the local sequential matrix generated
5327 
5328     Level: developer
5329 
5330    Note:
5331      This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5332      part, then those associated with the off diagonal part (in its local ordering)
5333 
5334 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5335 @*/
5336 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5337 {
5338   Mat             Ao, Ad;
5339   const PetscInt *cmap;
5340   PetscMPIInt     size;
5341   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5342 
5343   PetscFunctionBegin;
5344   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5345   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5346   if (size == 1) {
5347     if (scall == MAT_INITIAL_MATRIX) {
5348       PetscCall(PetscObjectReference((PetscObject)Ad));
5349       *A_loc = Ad;
5350     } else if (scall == MAT_REUSE_MATRIX) {
5351       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5352     }
5353     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5354     PetscFunctionReturn(PETSC_SUCCESS);
5355   }
5356   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5357   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5358   if (f) {
5359     PetscCall((*f)(A, scall, glob, A_loc));
5360   } else {
5361     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5362     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5363     Mat_SeqAIJ        *c;
5364     PetscInt          *ai = a->i, *aj = a->j;
5365     PetscInt          *bi = b->i, *bj = b->j;
5366     PetscInt          *ci, *cj;
5367     const PetscScalar *aa, *ba;
5368     PetscScalar       *ca;
5369     PetscInt           i, j, am, dn, on;
5370 
5371     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5372     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5373     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5374     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5375     if (scall == MAT_INITIAL_MATRIX) {
5376       PetscInt k;
5377       PetscCall(PetscMalloc1(1 + am, &ci));
5378       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5379       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5380       ci[0] = 0;
5381       for (i = 0, k = 0; i < am; i++) {
5382         const PetscInt ncols_o = bi[i + 1] - bi[i];
5383         const PetscInt ncols_d = ai[i + 1] - ai[i];
5384         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5385         /* diagonal portion of A */
5386         for (j = 0; j < ncols_d; j++, k++) {
5387           cj[k] = *aj++;
5388           ca[k] = *aa++;
5389         }
5390         /* off-diagonal portion of A */
5391         for (j = 0; j < ncols_o; j++, k++) {
5392           cj[k] = dn + *bj++;
5393           ca[k] = *ba++;
5394         }
5395       }
5396       /* put together the new matrix */
5397       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5398       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5399       /* Since these are PETSc arrays, change flags to free them as necessary. */
5400       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5401       c->free_a  = PETSC_TRUE;
5402       c->free_ij = PETSC_TRUE;
5403       c->nonew   = 0;
5404       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5405     } else if (scall == MAT_REUSE_MATRIX) {
5406       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5407       for (i = 0; i < am; i++) {
5408         const PetscInt ncols_d = ai[i + 1] - ai[i];
5409         const PetscInt ncols_o = bi[i + 1] - bi[i];
5410         /* diagonal portion of A */
5411         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5412         /* off-diagonal portion of A */
5413         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5414       }
5415       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5416     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5417     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5418     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5419     if (glob) {
5420       PetscInt cst, *gidx;
5421 
5422       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5423       PetscCall(PetscMalloc1(dn + on, &gidx));
5424       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5425       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5426       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5427     }
5428   }
5429   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5430   PetscFunctionReturn(PETSC_SUCCESS);
5431 }
5432 
5433 /*@C
5434      MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5435 
5436     Not Collective
5437 
5438    Input Parameters:
5439 +    A - the matrix
5440 .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5441 .    row - index set of rows to extract (or `NULL`)
5442 -    col - index set of columns to extract (or `NULL`)
5443 
5444    Output Parameter:
5445 .    A_loc - the local sequential matrix generated
5446 
5447     Level: developer
5448 
5449 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5450 @*/
5451 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5452 {
5453   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5454   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5455   IS          isrowa, iscola;
5456   Mat        *aloc;
5457   PetscBool   match;
5458 
5459   PetscFunctionBegin;
5460   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5461   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5462   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5463   if (!row) {
5464     start = A->rmap->rstart;
5465     end   = A->rmap->rend;
5466     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5467   } else {
5468     isrowa = *row;
5469   }
5470   if (!col) {
5471     start = A->cmap->rstart;
5472     cmap  = a->garray;
5473     nzA   = a->A->cmap->n;
5474     nzB   = a->B->cmap->n;
5475     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5476     ncols = 0;
5477     for (i = 0; i < nzB; i++) {
5478       if (cmap[i] < start) idx[ncols++] = cmap[i];
5479       else break;
5480     }
5481     imark = i;
5482     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5483     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5484     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5485   } else {
5486     iscola = *col;
5487   }
5488   if (scall != MAT_INITIAL_MATRIX) {
5489     PetscCall(PetscMalloc1(1, &aloc));
5490     aloc[0] = *A_loc;
5491   }
5492   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5493   if (!col) { /* attach global id of condensed columns */
5494     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5495   }
5496   *A_loc = aloc[0];
5497   PetscCall(PetscFree(aloc));
5498   if (!row) PetscCall(ISDestroy(&isrowa));
5499   if (!col) PetscCall(ISDestroy(&iscola));
5500   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5501   PetscFunctionReturn(PETSC_SUCCESS);
5502 }
5503 
5504 /*
5505  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5506  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5507  * on a global size.
5508  * */
5509 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5510 {
5511   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5512   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5513   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5514   PetscMPIInt            owner;
5515   PetscSFNode           *iremote, *oiremote;
5516   const PetscInt        *lrowindices;
5517   PetscSF                sf, osf;
5518   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5519   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5520   MPI_Comm               comm;
5521   ISLocalToGlobalMapping mapping;
5522   const PetscScalar     *pd_a, *po_a;
5523 
5524   PetscFunctionBegin;
5525   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5526   /* plocalsize is the number of roots
5527    * nrows is the number of leaves
5528    * */
5529   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5530   PetscCall(ISGetLocalSize(rows, &nrows));
5531   PetscCall(PetscCalloc1(nrows, &iremote));
5532   PetscCall(ISGetIndices(rows, &lrowindices));
5533   for (i = 0; i < nrows; i++) {
5534     /* Find a remote index and an owner for a row
5535      * The row could be local or remote
5536      * */
5537     owner = 0;
5538     lidx  = 0;
5539     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5540     iremote[i].index = lidx;
5541     iremote[i].rank  = owner;
5542   }
5543   /* Create SF to communicate how many nonzero columns for each row */
5544   PetscCall(PetscSFCreate(comm, &sf));
5545   /* SF will figure out the number of nonzero colunms for each row, and their
5546    * offsets
5547    * */
5548   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5549   PetscCall(PetscSFSetFromOptions(sf));
5550   PetscCall(PetscSFSetUp(sf));
5551 
5552   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5553   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5554   PetscCall(PetscCalloc1(nrows, &pnnz));
5555   roffsets[0] = 0;
5556   roffsets[1] = 0;
5557   for (i = 0; i < plocalsize; i++) {
5558     /* diag */
5559     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5560     /* off diag */
5561     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5562     /* compute offsets so that we relative location for each row */
5563     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5564     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5565   }
5566   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5567   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5568   /* 'r' means root, and 'l' means leaf */
5569   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5570   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5571   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5572   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5573   PetscCall(PetscSFDestroy(&sf));
5574   PetscCall(PetscFree(roffsets));
5575   PetscCall(PetscFree(nrcols));
5576   dntotalcols = 0;
5577   ontotalcols = 0;
5578   ncol        = 0;
5579   for (i = 0; i < nrows; i++) {
5580     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5581     ncol    = PetscMax(pnnz[i], ncol);
5582     /* diag */
5583     dntotalcols += nlcols[i * 2 + 0];
5584     /* off diag */
5585     ontotalcols += nlcols[i * 2 + 1];
5586   }
5587   /* We do not need to figure the right number of columns
5588    * since all the calculations will be done by going through the raw data
5589    * */
5590   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5591   PetscCall(MatSetUp(*P_oth));
5592   PetscCall(PetscFree(pnnz));
5593   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5594   /* diag */
5595   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5596   /* off diag */
5597   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5598   /* diag */
5599   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5600   /* off diag */
5601   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5602   dntotalcols = 0;
5603   ontotalcols = 0;
5604   ntotalcols  = 0;
5605   for (i = 0; i < nrows; i++) {
5606     owner = 0;
5607     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5608     /* Set iremote for diag matrix */
5609     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5610       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5611       iremote[dntotalcols].rank  = owner;
5612       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5613       ilocal[dntotalcols++] = ntotalcols++;
5614     }
5615     /* off diag */
5616     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5617       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5618       oiremote[ontotalcols].rank  = owner;
5619       oilocal[ontotalcols++]      = ntotalcols++;
5620     }
5621   }
5622   PetscCall(ISRestoreIndices(rows, &lrowindices));
5623   PetscCall(PetscFree(loffsets));
5624   PetscCall(PetscFree(nlcols));
5625   PetscCall(PetscSFCreate(comm, &sf));
5626   /* P serves as roots and P_oth is leaves
5627    * Diag matrix
5628    * */
5629   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5630   PetscCall(PetscSFSetFromOptions(sf));
5631   PetscCall(PetscSFSetUp(sf));
5632 
5633   PetscCall(PetscSFCreate(comm, &osf));
5634   /* Off diag */
5635   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5636   PetscCall(PetscSFSetFromOptions(osf));
5637   PetscCall(PetscSFSetUp(osf));
5638   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5639   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5640   /* We operate on the matrix internal data for saving memory */
5641   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5642   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5643   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5644   /* Convert to global indices for diag matrix */
5645   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5646   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5647   /* We want P_oth store global indices */
5648   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5649   /* Use memory scalable approach */
5650   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5651   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5652   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5653   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5654   /* Convert back to local indices */
5655   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5656   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5657   nout = 0;
5658   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5659   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5660   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5661   /* Exchange values */
5662   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5663   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5664   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5665   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5666   /* Stop PETSc from shrinking memory */
5667   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5668   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5669   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5670   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5671   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5672   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5673   PetscCall(PetscSFDestroy(&sf));
5674   PetscCall(PetscSFDestroy(&osf));
5675   PetscFunctionReturn(PETSC_SUCCESS);
5676 }
5677 
5678 /*
5679  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5680  * This supports MPIAIJ and MAIJ
5681  * */
5682 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5683 {
5684   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5685   Mat_SeqAIJ *p_oth;
5686   IS          rows, map;
5687   PetscHMapI  hamp;
5688   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5689   MPI_Comm    comm;
5690   PetscSF     sf, osf;
5691   PetscBool   has;
5692 
5693   PetscFunctionBegin;
5694   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5695   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5696   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5697    *  and then create a submatrix (that often is an overlapping matrix)
5698    * */
5699   if (reuse == MAT_INITIAL_MATRIX) {
5700     /* Use a hash table to figure out unique keys */
5701     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5702     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5703     count = 0;
5704     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5705     for (i = 0; i < a->B->cmap->n; i++) {
5706       key = a->garray[i] / dof;
5707       PetscCall(PetscHMapIHas(hamp, key, &has));
5708       if (!has) {
5709         mapping[i] = count;
5710         PetscCall(PetscHMapISet(hamp, key, count++));
5711       } else {
5712         /* Current 'i' has the same value the previous step */
5713         mapping[i] = count - 1;
5714       }
5715     }
5716     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5717     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5718     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5719     PetscCall(PetscCalloc1(htsize, &rowindices));
5720     off = 0;
5721     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5722     PetscCall(PetscHMapIDestroy(&hamp));
5723     PetscCall(PetscSortInt(htsize, rowindices));
5724     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5725     /* In case, the matrix was already created but users want to recreate the matrix */
5726     PetscCall(MatDestroy(P_oth));
5727     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5728     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5729     PetscCall(ISDestroy(&map));
5730     PetscCall(ISDestroy(&rows));
5731   } else if (reuse == MAT_REUSE_MATRIX) {
5732     /* If matrix was already created, we simply update values using SF objects
5733      * that as attached to the matrix earlier.
5734      */
5735     const PetscScalar *pd_a, *po_a;
5736 
5737     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5738     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5739     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5740     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5741     /* Update values in place */
5742     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5743     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5744     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5745     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5746     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5747     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5748     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5749     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5750   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5751   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5752   PetscFunctionReturn(PETSC_SUCCESS);
5753 }
5754 
5755 /*@C
5756   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5757 
5758   Collective
5759 
5760   Input Parameters:
5761 + A - the first matrix in `MATMPIAIJ` format
5762 . B - the second matrix in `MATMPIAIJ` format
5763 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5764 
5765   Output Parameters:
5766 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output
5767 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output
5768 - B_seq - the sequential matrix generated
5769 
5770   Level: developer
5771 
5772 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5773 @*/
5774 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5775 {
5776   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5777   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5778   IS          isrowb, iscolb;
5779   Mat        *bseq = NULL;
5780 
5781   PetscFunctionBegin;
5782   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5783              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5784   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5785 
5786   if (scall == MAT_INITIAL_MATRIX) {
5787     start = A->cmap->rstart;
5788     cmap  = a->garray;
5789     nzA   = a->A->cmap->n;
5790     nzB   = a->B->cmap->n;
5791     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5792     ncols = 0;
5793     for (i = 0; i < nzB; i++) { /* row < local row index */
5794       if (cmap[i] < start) idx[ncols++] = cmap[i];
5795       else break;
5796     }
5797     imark = i;
5798     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5799     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5800     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5801     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5802   } else {
5803     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5804     isrowb = *rowb;
5805     iscolb = *colb;
5806     PetscCall(PetscMalloc1(1, &bseq));
5807     bseq[0] = *B_seq;
5808   }
5809   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5810   *B_seq = bseq[0];
5811   PetscCall(PetscFree(bseq));
5812   if (!rowb) {
5813     PetscCall(ISDestroy(&isrowb));
5814   } else {
5815     *rowb = isrowb;
5816   }
5817   if (!colb) {
5818     PetscCall(ISDestroy(&iscolb));
5819   } else {
5820     *colb = iscolb;
5821   }
5822   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5823   PetscFunctionReturn(PETSC_SUCCESS);
5824 }
5825 
5826 /*
5827     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5828     of the OFF-DIAGONAL portion of local A
5829 
5830     Collective
5831 
5832    Input Parameters:
5833 +    A,B - the matrices in `MATMPIAIJ` format
5834 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5835 
5836    Output Parameter:
5837 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5838 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5839 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5840 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5841 
5842     Developer Note:
5843     This directly accesses information inside the VecScatter associated with the matrix-vector product
5844      for this matrix. This is not desirable..
5845 
5846     Level: developer
5847 
5848 */
5849 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5850 {
5851   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5852   Mat_SeqAIJ        *b_oth;
5853   VecScatter         ctx;
5854   MPI_Comm           comm;
5855   const PetscMPIInt *rprocs, *sprocs;
5856   const PetscInt    *srow, *rstarts, *sstarts;
5857   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5858   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5859   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5860   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5861   PetscMPIInt        size, tag, rank, nreqs;
5862 
5863   PetscFunctionBegin;
5864   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5865   PetscCallMPI(MPI_Comm_size(comm, &size));
5866 
5867   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5868              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5869   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5870   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5871 
5872   if (size == 1) {
5873     startsj_s = NULL;
5874     bufa_ptr  = NULL;
5875     *B_oth    = NULL;
5876     PetscFunctionReturn(PETSC_SUCCESS);
5877   }
5878 
5879   ctx = a->Mvctx;
5880   tag = ((PetscObject)ctx)->tag;
5881 
5882   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5883   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5884   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5885   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5886   PetscCall(PetscMalloc1(nreqs, &reqs));
5887   rwaits = reqs;
5888   swaits = reqs + nrecvs;
5889 
5890   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5891   if (scall == MAT_INITIAL_MATRIX) {
5892     /* i-array */
5893     /*  post receives */
5894     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5895     for (i = 0; i < nrecvs; i++) {
5896       rowlen = rvalues + rstarts[i] * rbs;
5897       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5898       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5899     }
5900 
5901     /* pack the outgoing message */
5902     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5903 
5904     sstartsj[0] = 0;
5905     rstartsj[0] = 0;
5906     len         = 0; /* total length of j or a array to be sent */
5907     if (nsends) {
5908       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5909       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5910     }
5911     for (i = 0; i < nsends; i++) {
5912       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5913       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5914       for (j = 0; j < nrows; j++) {
5915         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5916         for (l = 0; l < sbs; l++) {
5917           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5918 
5919           rowlen[j * sbs + l] = ncols;
5920 
5921           len += ncols;
5922           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5923         }
5924         k++;
5925       }
5926       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5927 
5928       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5929     }
5930     /* recvs and sends of i-array are completed */
5931     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5932     PetscCall(PetscFree(svalues));
5933 
5934     /* allocate buffers for sending j and a arrays */
5935     PetscCall(PetscMalloc1(len + 1, &bufj));
5936     PetscCall(PetscMalloc1(len + 1, &bufa));
5937 
5938     /* create i-array of B_oth */
5939     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5940 
5941     b_othi[0] = 0;
5942     len       = 0; /* total length of j or a array to be received */
5943     k         = 0;
5944     for (i = 0; i < nrecvs; i++) {
5945       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5946       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5947       for (j = 0; j < nrows; j++) {
5948         b_othi[k + 1] = b_othi[k] + rowlen[j];
5949         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5950         k++;
5951       }
5952       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5953     }
5954     PetscCall(PetscFree(rvalues));
5955 
5956     /* allocate space for j and a arrays of B_oth */
5957     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5958     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5959 
5960     /* j-array */
5961     /*  post receives of j-array */
5962     for (i = 0; i < nrecvs; i++) {
5963       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5964       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5965     }
5966 
5967     /* pack the outgoing message j-array */
5968     if (nsends) k = sstarts[0];
5969     for (i = 0; i < nsends; i++) {
5970       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5971       bufJ  = bufj + sstartsj[i];
5972       for (j = 0; j < nrows; j++) {
5973         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5974         for (ll = 0; ll < sbs; ll++) {
5975           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5976           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5977           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5978         }
5979       }
5980       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5981     }
5982 
5983     /* recvs and sends of j-array are completed */
5984     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5985   } else if (scall == MAT_REUSE_MATRIX) {
5986     sstartsj = *startsj_s;
5987     rstartsj = *startsj_r;
5988     bufa     = *bufa_ptr;
5989     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5990     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5991   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5992 
5993   /* a-array */
5994   /*  post receives of a-array */
5995   for (i = 0; i < nrecvs; i++) {
5996     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5997     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5998   }
5999 
6000   /* pack the outgoing message a-array */
6001   if (nsends) k = sstarts[0];
6002   for (i = 0; i < nsends; i++) {
6003     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
6004     bufA  = bufa + sstartsj[i];
6005     for (j = 0; j < nrows; j++) {
6006       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
6007       for (ll = 0; ll < sbs; ll++) {
6008         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6009         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
6010         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6011       }
6012     }
6013     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6014   }
6015   /* recvs and sends of a-array are completed */
6016   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6017   PetscCall(PetscFree(reqs));
6018 
6019   if (scall == MAT_INITIAL_MATRIX) {
6020     /* put together the new matrix */
6021     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6022 
6023     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6024     /* Since these are PETSc arrays, change flags to free them as necessary. */
6025     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6026     b_oth->free_a  = PETSC_TRUE;
6027     b_oth->free_ij = PETSC_TRUE;
6028     b_oth->nonew   = 0;
6029 
6030     PetscCall(PetscFree(bufj));
6031     if (!startsj_s || !bufa_ptr) {
6032       PetscCall(PetscFree2(sstartsj, rstartsj));
6033       PetscCall(PetscFree(bufa_ptr));
6034     } else {
6035       *startsj_s = sstartsj;
6036       *startsj_r = rstartsj;
6037       *bufa_ptr  = bufa;
6038     }
6039   } else if (scall == MAT_REUSE_MATRIX) {
6040     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6041   }
6042 
6043   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6044   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6045   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6046   PetscFunctionReturn(PETSC_SUCCESS);
6047 }
6048 
6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6052 #if defined(PETSC_HAVE_MKL_SPARSE)
6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6054 #endif
6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6057 #if defined(PETSC_HAVE_ELEMENTAL)
6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6059 #endif
6060 #if defined(PETSC_HAVE_SCALAPACK)
6061 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6062 #endif
6063 #if defined(PETSC_HAVE_HYPRE)
6064 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6065 #endif
6066 #if defined(PETSC_HAVE_CUDA)
6067 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6068 #endif
6069 #if defined(PETSC_HAVE_HIP)
6070 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6071 #endif
6072 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6073 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6074 #endif
6075 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6076 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6077 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6078 
6079 /*
6080     Computes (B'*A')' since computing B*A directly is untenable
6081 
6082                n                       p                          p
6083         [             ]       [             ]         [                 ]
6084       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6085         [             ]       [             ]         [                 ]
6086 
6087 */
6088 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6089 {
6090   Mat At, Bt, Ct;
6091 
6092   PetscFunctionBegin;
6093   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6094   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6095   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6096   PetscCall(MatDestroy(&At));
6097   PetscCall(MatDestroy(&Bt));
6098   PetscCall(MatTransposeSetPrecursor(Ct, C));
6099   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6100   PetscCall(MatDestroy(&Ct));
6101   PetscFunctionReturn(PETSC_SUCCESS);
6102 }
6103 
6104 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6105 {
6106   PetscBool cisdense;
6107 
6108   PetscFunctionBegin;
6109   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6110   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6111   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6112   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6113   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6114   PetscCall(MatSetUp(C));
6115 
6116   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6117   PetscFunctionReturn(PETSC_SUCCESS);
6118 }
6119 
6120 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6121 {
6122   Mat_Product *product = C->product;
6123   Mat          A = product->A, B = product->B;
6124 
6125   PetscFunctionBegin;
6126   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6127              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6128   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6129   C->ops->productsymbolic = MatProductSymbolic_AB;
6130   PetscFunctionReturn(PETSC_SUCCESS);
6131 }
6132 
6133 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6134 {
6135   Mat_Product *product = C->product;
6136 
6137   PetscFunctionBegin;
6138   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6139   PetscFunctionReturn(PETSC_SUCCESS);
6140 }
6141 
6142 /*
6143    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6144 
6145   Input Parameters:
6146 
6147     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6148     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6149 
6150     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6151 
6152     For Set1, j1[] contains column indices of the nonzeros.
6153     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6154     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6155     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6156 
6157     Similar for Set2.
6158 
6159     This routine merges the two sets of nonzeros row by row and removes repeats.
6160 
6161   Output Parameters: (memory is allocated by the caller)
6162 
6163     i[],j[]: the CSR of the merged matrix, which has m rows.
6164     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6165     imap2[]: similar to imap1[], but for Set2.
6166     Note we order nonzeros row-by-row and from left to right.
6167 */
6168 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6169 {
6170   PetscInt   r, m; /* Row index of mat */
6171   PetscCount t, t1, t2, b1, e1, b2, e2;
6172 
6173   PetscFunctionBegin;
6174   PetscCall(MatGetLocalSize(mat, &m, NULL));
6175   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6176   i[0]        = 0;
6177   for (r = 0; r < m; r++) { /* Do row by row merging */
6178     b1 = rowBegin1[r];
6179     e1 = rowEnd1[r];
6180     b2 = rowBegin2[r];
6181     e2 = rowEnd2[r];
6182     while (b1 < e1 && b2 < e2) {
6183       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6184         j[t]      = j1[b1];
6185         imap1[t1] = t;
6186         imap2[t2] = t;
6187         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6188         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6189         t1++;
6190         t2++;
6191         t++;
6192       } else if (j1[b1] < j2[b2]) {
6193         j[t]      = j1[b1];
6194         imap1[t1] = t;
6195         b1 += jmap1[t1 + 1] - jmap1[t1];
6196         t1++;
6197         t++;
6198       } else {
6199         j[t]      = j2[b2];
6200         imap2[t2] = t;
6201         b2 += jmap2[t2 + 1] - jmap2[t2];
6202         t2++;
6203         t++;
6204       }
6205     }
6206     /* Merge the remaining in either j1[] or j2[] */
6207     while (b1 < e1) {
6208       j[t]      = j1[b1];
6209       imap1[t1] = t;
6210       b1 += jmap1[t1 + 1] - jmap1[t1];
6211       t1++;
6212       t++;
6213     }
6214     while (b2 < e2) {
6215       j[t]      = j2[b2];
6216       imap2[t2] = t;
6217       b2 += jmap2[t2 + 1] - jmap2[t2];
6218       t2++;
6219       t++;
6220     }
6221     i[r + 1] = t;
6222   }
6223   PetscFunctionReturn(PETSC_SUCCESS);
6224 }
6225 
6226 /*
6227   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6228 
6229   Input Parameters:
6230     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6231     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6232       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6233 
6234       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6235       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6236 
6237   Output Parameters:
6238     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6239     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6240       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6241       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6242 
6243     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6244       Atot: number of entries belonging to the diagonal block.
6245       Annz: number of unique nonzeros belonging to the diagonal block.
6246       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6247         repeats (i.e., same 'i,j' pair).
6248       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6249         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6250 
6251       Atot: number of entries belonging to the diagonal block
6252       Annz: number of unique nonzeros belonging to the diagonal block.
6253 
6254     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6255 
6256     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6257 */
6258 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6259 {
6260   PetscInt    cstart, cend, rstart, rend, row, col;
6261   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6262   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6263   PetscCount  k, m, p, q, r, s, mid;
6264   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6265 
6266   PetscFunctionBegin;
6267   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6268   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6269   m = rend - rstart;
6270 
6271   for (k = 0; k < n; k++) {
6272     if (i[k] >= 0) break;
6273   } /* Skip negative rows */
6274 
6275   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6276      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6277   */
6278   while (k < n) {
6279     row = i[k];
6280     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6281     for (s = k; s < n; s++)
6282       if (i[s] != row) break;
6283     for (p = k; p < s; p++) {
6284       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6285       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6286     }
6287     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6288     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6289     rowBegin[row - rstart] = k;
6290     rowMid[row - rstart]   = mid;
6291     rowEnd[row - rstart]   = s;
6292 
6293     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6294     Atot += mid - k;
6295     Btot += s - mid;
6296 
6297     /* Count unique nonzeros of this diag/offdiag row */
6298     for (p = k; p < mid;) {
6299       col = j[p];
6300       do {
6301         j[p] += PETSC_MAX_INT;
6302         p++;
6303       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6304       Annz++;
6305     }
6306 
6307     for (p = mid; p < s;) {
6308       col = j[p];
6309       do {
6310         p++;
6311       } while (p < s && j[p] == col);
6312       Bnnz++;
6313     }
6314     k = s;
6315   }
6316 
6317   /* Allocation according to Atot, Btot, Annz, Bnnz */
6318   PetscCall(PetscMalloc1(Atot, &Aperm));
6319   PetscCall(PetscMalloc1(Btot, &Bperm));
6320   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6321   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6322 
6323   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6324   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6325   for (r = 0; r < m; r++) {
6326     k   = rowBegin[r];
6327     mid = rowMid[r];
6328     s   = rowEnd[r];
6329     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6330     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6331     Atot += mid - k;
6332     Btot += s - mid;
6333 
6334     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6335     for (p = k; p < mid;) {
6336       col = j[p];
6337       q   = p;
6338       do {
6339         p++;
6340       } while (p < mid && j[p] == col);
6341       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6342       Annz++;
6343     }
6344 
6345     for (p = mid; p < s;) {
6346       col = j[p];
6347       q   = p;
6348       do {
6349         p++;
6350       } while (p < s && j[p] == col);
6351       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6352       Bnnz++;
6353     }
6354   }
6355   /* Output */
6356   *Aperm_ = Aperm;
6357   *Annz_  = Annz;
6358   *Atot_  = Atot;
6359   *Ajmap_ = Ajmap;
6360   *Bperm_ = Bperm;
6361   *Bnnz_  = Bnnz;
6362   *Btot_  = Btot;
6363   *Bjmap_ = Bjmap;
6364   PetscFunctionReturn(PETSC_SUCCESS);
6365 }
6366 
6367 /*
6368   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6369 
6370   Input Parameters:
6371     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6372     nnz:  number of unique nonzeros in the merged matrix
6373     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6374     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6375 
6376   Output Parameter: (memory is allocated by the caller)
6377     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6378 
6379   Example:
6380     nnz1 = 4
6381     nnz  = 6
6382     imap = [1,3,4,5]
6383     jmap = [0,3,5,6,7]
6384    then,
6385     jmap_new = [0,0,3,3,5,6,7]
6386 */
6387 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6388 {
6389   PetscCount k, p;
6390 
6391   PetscFunctionBegin;
6392   jmap_new[0] = 0;
6393   p           = nnz;                /* p loops over jmap_new[] backwards */
6394   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6395     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6396   }
6397   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6398   PetscFunctionReturn(PETSC_SUCCESS);
6399 }
6400 
6401 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6402 {
6403   MPI_Comm    comm;
6404   PetscMPIInt rank, size;
6405   PetscInt    m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6406   PetscCount  k, p, q, rem;                           /* Loop variables over coo arrays */
6407   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
6408 
6409   PetscFunctionBegin;
6410   PetscCall(PetscFree(mpiaij->garray));
6411   PetscCall(VecDestroy(&mpiaij->lvec));
6412 #if defined(PETSC_USE_CTABLE)
6413   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6414 #else
6415   PetscCall(PetscFree(mpiaij->colmap));
6416 #endif
6417   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6418   mat->assembled     = PETSC_FALSE;
6419   mat->was_assembled = PETSC_FALSE;
6420   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6421 
6422   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6423   PetscCallMPI(MPI_Comm_size(comm, &size));
6424   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6425   PetscCall(PetscLayoutSetUp(mat->rmap));
6426   PetscCall(PetscLayoutSetUp(mat->cmap));
6427   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6428   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6429   PetscCall(MatGetLocalSize(mat, &m, &n));
6430   PetscCall(MatGetSize(mat, &M, &N));
6431 
6432   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6433   /* entries come first, then local rows, then remote rows.                     */
6434   PetscCount n1 = coo_n, *perm1;
6435   PetscInt  *i1 = coo_i, *j1 = coo_j;
6436 
6437   PetscCall(PetscMalloc1(n1, &perm1));
6438   for (k = 0; k < n1; k++) perm1[k] = k;
6439 
6440   /* Manipulate indices so that entries with negative row or col indices will have smallest
6441      row indices, local entries will have greater but negative row indices, and remote entries
6442      will have positive row indices.
6443   */
6444   for (k = 0; k < n1; k++) {
6445     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6446     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6447     else {
6448       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6449       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6450     }
6451   }
6452 
6453   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6454   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6455   for (k = 0; k < n1; k++) {
6456     if (i1[k] > PETSC_MIN_INT) break;
6457   }                                                                               /* Advance k to the first entry we need to take care of */
6458   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6459   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6460 
6461   /*           Split local rows into diag/offdiag portions                      */
6462   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6463   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6464   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6465 
6466   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6467   PetscCall(PetscMalloc1(n1 - rem, &Cperm1));
6468   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6469 
6470   /*           Send remote rows to their owner                                  */
6471   /* Find which rows should be sent to which remote ranks*/
6472   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6473   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6474   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6475   const PetscInt *ranges;
6476   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6477 
6478   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6479   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6480   for (k = rem; k < n1;) {
6481     PetscMPIInt owner;
6482     PetscInt    firstRow, lastRow;
6483 
6484     /* Locate a row range */
6485     firstRow = i1[k]; /* first row of this owner */
6486     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6487     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6488 
6489     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6490     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6491 
6492     /* All entries in [k,p) belong to this remote owner */
6493     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6494       PetscMPIInt *sendto2;
6495       PetscInt    *nentries2;
6496       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6497 
6498       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6499       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6500       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6501       PetscCall(PetscFree2(sendto, nentries2));
6502       sendto   = sendto2;
6503       nentries = nentries2;
6504       maxNsend = maxNsend2;
6505     }
6506     sendto[nsend]   = owner;
6507     nentries[nsend] = p - k;
6508     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6509     nsend++;
6510     k = p;
6511   }
6512 
6513   /* Build 1st SF to know offsets on remote to send data */
6514   PetscSF      sf1;
6515   PetscInt     nroots = 1, nroots2 = 0;
6516   PetscInt     nleaves = nsend, nleaves2 = 0;
6517   PetscInt    *offsets;
6518   PetscSFNode *iremote;
6519 
6520   PetscCall(PetscSFCreate(comm, &sf1));
6521   PetscCall(PetscMalloc1(nsend, &iremote));
6522   PetscCall(PetscMalloc1(nsend, &offsets));
6523   for (k = 0; k < nsend; k++) {
6524     iremote[k].rank  = sendto[k];
6525     iremote[k].index = 0;
6526     nleaves2 += nentries[k];
6527     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6528   }
6529   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6530   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6531   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6532   PetscCall(PetscSFDestroy(&sf1));
6533   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6534 
6535   /* Build 2nd SF to send remote COOs to their owner */
6536   PetscSF sf2;
6537   nroots  = nroots2;
6538   nleaves = nleaves2;
6539   PetscCall(PetscSFCreate(comm, &sf2));
6540   PetscCall(PetscSFSetFromOptions(sf2));
6541   PetscCall(PetscMalloc1(nleaves, &iremote));
6542   p = 0;
6543   for (k = 0; k < nsend; k++) {
6544     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6545     for (q = 0; q < nentries[k]; q++, p++) {
6546       iremote[p].rank  = sendto[k];
6547       iremote[p].index = offsets[k] + q;
6548     }
6549   }
6550   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6551 
6552   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6553   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem));
6554 
6555   /* Send the remote COOs to their owner */
6556   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6557   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6558   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6559   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6560   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6561   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6562   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6563 
6564   PetscCall(PetscFree(offsets));
6565   PetscCall(PetscFree2(sendto, nentries));
6566 
6567   /* Sort received COOs by row along with the permutation array     */
6568   for (k = 0; k < n2; k++) perm2[k] = k;
6569   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6570 
6571   /* Split received COOs into diag/offdiag portions                 */
6572   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6573   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6574   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6575 
6576   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6577   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6578 
6579   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6580   PetscInt *Ai, *Bi;
6581   PetscInt *Aj, *Bj;
6582 
6583   PetscCall(PetscMalloc1(m + 1, &Ai));
6584   PetscCall(PetscMalloc1(m + 1, &Bi));
6585   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6586   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6587 
6588   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6589   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6590   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6591   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6592   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6593 
6594   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6595   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6596 
6597   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6598   /* expect nonzeros in A/B most likely have local contributing entries        */
6599   PetscInt    Annz = Ai[m];
6600   PetscInt    Bnnz = Bi[m];
6601   PetscCount *Ajmap1_new, *Bjmap1_new;
6602 
6603   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6604   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6605 
6606   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6607   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6608 
6609   PetscCall(PetscFree(Aimap1));
6610   PetscCall(PetscFree(Ajmap1));
6611   PetscCall(PetscFree(Bimap1));
6612   PetscCall(PetscFree(Bjmap1));
6613   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6614   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6615   PetscCall(PetscFree(perm1));
6616   PetscCall(PetscFree3(i2, j2, perm2));
6617 
6618   Ajmap1 = Ajmap1_new;
6619   Bjmap1 = Bjmap1_new;
6620 
6621   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6622   if (Annz < Annz1 + Annz2) {
6623     PetscInt *Aj_new;
6624     PetscCall(PetscMalloc1(Annz, &Aj_new));
6625     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6626     PetscCall(PetscFree(Aj));
6627     Aj = Aj_new;
6628   }
6629 
6630   if (Bnnz < Bnnz1 + Bnnz2) {
6631     PetscInt *Bj_new;
6632     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6633     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6634     PetscCall(PetscFree(Bj));
6635     Bj = Bj_new;
6636   }
6637 
6638   /* Create new submatrices for on-process and off-process coupling                  */
6639   PetscScalar *Aa, *Ba;
6640   MatType      rtype;
6641   Mat_SeqAIJ  *a, *b;
6642   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6643   PetscCall(PetscCalloc1(Bnnz, &Ba));
6644   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6645   if (cstart) {
6646     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6647   }
6648   PetscCall(MatDestroy(&mpiaij->A));
6649   PetscCall(MatDestroy(&mpiaij->B));
6650   PetscCall(MatGetRootType_Private(mat, &rtype));
6651   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6652   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6653   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6654 
6655   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6656   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6657   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6658   a->free_a = b->free_a = PETSC_TRUE;
6659   a->free_ij = b->free_ij = PETSC_TRUE;
6660 
6661   /* conversion must happen AFTER multiply setup */
6662   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6663   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6664   PetscCall(VecDestroy(&mpiaij->lvec));
6665   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6666 
6667   mpiaij->coo_n   = coo_n;
6668   mpiaij->coo_sf  = sf2;
6669   mpiaij->sendlen = nleaves;
6670   mpiaij->recvlen = nroots;
6671 
6672   mpiaij->Annz = Annz;
6673   mpiaij->Bnnz = Bnnz;
6674 
6675   mpiaij->Annz2 = Annz2;
6676   mpiaij->Bnnz2 = Bnnz2;
6677 
6678   mpiaij->Atot1 = Atot1;
6679   mpiaij->Atot2 = Atot2;
6680   mpiaij->Btot1 = Btot1;
6681   mpiaij->Btot2 = Btot2;
6682 
6683   mpiaij->Ajmap1 = Ajmap1;
6684   mpiaij->Aperm1 = Aperm1;
6685 
6686   mpiaij->Bjmap1 = Bjmap1;
6687   mpiaij->Bperm1 = Bperm1;
6688 
6689   mpiaij->Aimap2 = Aimap2;
6690   mpiaij->Ajmap2 = Ajmap2;
6691   mpiaij->Aperm2 = Aperm2;
6692 
6693   mpiaij->Bimap2 = Bimap2;
6694   mpiaij->Bjmap2 = Bjmap2;
6695   mpiaij->Bperm2 = Bperm2;
6696 
6697   mpiaij->Cperm1 = Cperm1;
6698 
6699   /* Allocate in preallocation. If not used, it has zero cost on host */
6700   PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf));
6701   PetscFunctionReturn(PETSC_SUCCESS);
6702 }
6703 
6704 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6705 {
6706   Mat_MPIAIJ       *mpiaij = (Mat_MPIAIJ *)mat->data;
6707   Mat               A = mpiaij->A, B = mpiaij->B;
6708   PetscCount        Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2;
6709   PetscScalar      *Aa, *Ba;
6710   PetscScalar      *sendbuf = mpiaij->sendbuf;
6711   PetscScalar      *recvbuf = mpiaij->recvbuf;
6712   const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2;
6713   const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2;
6714   const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2;
6715   const PetscCount *Cperm1 = mpiaij->Cperm1;
6716 
6717   PetscFunctionBegin;
6718   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6719   PetscCall(MatSeqAIJGetArray(B, &Ba));
6720 
6721   /* Pack entries to be sent to remote */
6722   for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6723 
6724   /* Send remote entries to their owner and overlap the communication with local computation */
6725   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6726   /* Add local entries to A and B */
6727   for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6728     PetscScalar sum = 0.0;                /* Do partial summation first to improve numerical stability */
6729     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6730     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6731   }
6732   for (PetscCount i = 0; i < Bnnz; i++) {
6733     PetscScalar sum = 0.0;
6734     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6735     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6736   }
6737   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6738 
6739   /* Add received remote entries to A and B */
6740   for (PetscCount i = 0; i < Annz2; i++) {
6741     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6742   }
6743   for (PetscCount i = 0; i < Bnnz2; i++) {
6744     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6745   }
6746   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6747   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6748   PetscFunctionReturn(PETSC_SUCCESS);
6749 }
6750 
6751 /*MC
6752    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6753 
6754    Options Database Keys:
6755 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6756 
6757    Level: beginner
6758 
6759    Notes:
6760    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6761     in this case the values associated with the rows and columns one passes in are set to zero
6762     in the matrix
6763 
6764     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6765     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6766 
6767 .seealso: [](chapter_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6768 M*/
6769 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6770 {
6771   Mat_MPIAIJ *b;
6772   PetscMPIInt size;
6773 
6774   PetscFunctionBegin;
6775   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6776 
6777   PetscCall(PetscNew(&b));
6778   B->data = (void *)b;
6779   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
6780   B->assembled  = PETSC_FALSE;
6781   B->insertmode = NOT_SET_VALUES;
6782   b->size       = size;
6783 
6784   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6785 
6786   /* build cache for off array entries formed */
6787   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6788 
6789   b->donotstash  = PETSC_FALSE;
6790   b->colmap      = NULL;
6791   b->garray      = NULL;
6792   b->roworiented = PETSC_TRUE;
6793 
6794   /* stuff used for matrix vector multiply */
6795   b->lvec  = NULL;
6796   b->Mvctx = NULL;
6797 
6798   /* stuff for MatGetRow() */
6799   b->rowindices   = NULL;
6800   b->rowvalues    = NULL;
6801   b->getrowactive = PETSC_FALSE;
6802 
6803   /* flexible pointer used in CUSPARSE classes */
6804   b->spptr = NULL;
6805 
6806   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6807   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6808   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6809   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6810   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6811   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6812   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6813   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6814   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6815   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6816 #if defined(PETSC_HAVE_CUDA)
6817   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6818 #endif
6819 #if defined(PETSC_HAVE_HIP)
6820   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6821 #endif
6822 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6823   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6824 #endif
6825 #if defined(PETSC_HAVE_MKL_SPARSE)
6826   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6827 #endif
6828   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6829   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6830   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6831   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6832 #if defined(PETSC_HAVE_ELEMENTAL)
6833   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6834 #endif
6835 #if defined(PETSC_HAVE_SCALAPACK)
6836   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6837 #endif
6838   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6839   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6840 #if defined(PETSC_HAVE_HYPRE)
6841   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6842   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6843 #endif
6844   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6845   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6846   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6847   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6848   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6849   PetscFunctionReturn(PETSC_SUCCESS);
6850 }
6851 
6852 /*@C
6853      MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6854          and "off-diagonal" part of the matrix in CSR format.
6855 
6856    Collective
6857 
6858    Input Parameters:
6859 +  comm - MPI communicator
6860 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
6861 .  n - This value should be the same as the local size used in creating the
6862        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6863        calculated if `N` is given) For square matrices `n` is almost always `m`.
6864 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6865 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6866 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6867 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6868 .   a - matrix values
6869 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6870 .   oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6871 -   oa - matrix values
6872 
6873    Output Parameter:
6874 .   mat - the matrix
6875 
6876    Level: advanced
6877 
6878    Notes:
6879        The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6880        must free the arrays once the matrix has been destroyed and not before.
6881 
6882        The `i` and `j` indices are 0 based
6883 
6884        See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6885 
6886        This sets local rows and cannot be used to set off-processor values.
6887 
6888        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6889        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6890        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6891        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6892        keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6893        communication if it is known that only local entries will be set.
6894 
6895 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6896           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6897 @*/
6898 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6899 {
6900   Mat_MPIAIJ *maij;
6901 
6902   PetscFunctionBegin;
6903   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6904   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6905   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6906   PetscCall(MatCreate(comm, mat));
6907   PetscCall(MatSetSizes(*mat, m, n, M, N));
6908   PetscCall(MatSetType(*mat, MATMPIAIJ));
6909   maij = (Mat_MPIAIJ *)(*mat)->data;
6910 
6911   (*mat)->preallocated = PETSC_TRUE;
6912 
6913   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6914   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6915 
6916   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6917   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6918 
6919   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6920   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6921   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6922   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6923   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6924   PetscFunctionReturn(PETSC_SUCCESS);
6925 }
6926 
6927 typedef struct {
6928   Mat       *mp;    /* intermediate products */
6929   PetscBool *mptmp; /* is the intermediate product temporary ? */
6930   PetscInt   cp;    /* number of intermediate products */
6931 
6932   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6933   PetscInt    *startsj_s, *startsj_r;
6934   PetscScalar *bufa;
6935   Mat          P_oth;
6936 
6937   /* may take advantage of merging product->B */
6938   Mat Bloc; /* B-local by merging diag and off-diag */
6939 
6940   /* cusparse does not have support to split between symbolic and numeric phases.
6941      When api_user is true, we don't need to update the numerical values
6942      of the temporary storage */
6943   PetscBool reusesym;
6944 
6945   /* support for COO values insertion */
6946   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6947   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6948   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6949   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6950   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6951   PetscMemType mtype;
6952 
6953   /* customization */
6954   PetscBool abmerge;
6955   PetscBool P_oth_bind;
6956 } MatMatMPIAIJBACKEND;
6957 
6958 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6959 {
6960   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6961   PetscInt             i;
6962 
6963   PetscFunctionBegin;
6964   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6965   PetscCall(PetscFree(mmdata->bufa));
6966   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6967   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6968   PetscCall(MatDestroy(&mmdata->P_oth));
6969   PetscCall(MatDestroy(&mmdata->Bloc));
6970   PetscCall(PetscSFDestroy(&mmdata->sf));
6971   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
6972   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
6973   PetscCall(PetscFree(mmdata->own[0]));
6974   PetscCall(PetscFree(mmdata->own));
6975   PetscCall(PetscFree(mmdata->off[0]));
6976   PetscCall(PetscFree(mmdata->off));
6977   PetscCall(PetscFree(mmdata));
6978   PetscFunctionReturn(PETSC_SUCCESS);
6979 }
6980 
6981 /* Copy selected n entries with indices in idx[] of A to v[].
6982    If idx is NULL, copy the whole data array of A to v[]
6983  */
6984 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6985 {
6986   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
6987 
6988   PetscFunctionBegin;
6989   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
6990   if (f) {
6991     PetscCall((*f)(A, n, idx, v));
6992   } else {
6993     const PetscScalar *vv;
6994 
6995     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
6996     if (n && idx) {
6997       PetscScalar    *w  = v;
6998       const PetscInt *oi = idx;
6999       PetscInt        j;
7000 
7001       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7002     } else {
7003       PetscCall(PetscArraycpy(v, vv, n));
7004     }
7005     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7006   }
7007   PetscFunctionReturn(PETSC_SUCCESS);
7008 }
7009 
7010 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7011 {
7012   MatMatMPIAIJBACKEND *mmdata;
7013   PetscInt             i, n_d, n_o;
7014 
7015   PetscFunctionBegin;
7016   MatCheckProduct(C, 1);
7017   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7018   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7019   if (!mmdata->reusesym) { /* update temporary matrices */
7020     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7021     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7022   }
7023   mmdata->reusesym = PETSC_FALSE;
7024 
7025   for (i = 0; i < mmdata->cp; i++) {
7026     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7027     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7028   }
7029   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7030     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7031 
7032     if (mmdata->mptmp[i]) continue;
7033     if (noff) {
7034       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7035 
7036       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7037       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7038       n_o += noff;
7039       n_d += nown;
7040     } else {
7041       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7042 
7043       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7044       n_d += mm->nz;
7045     }
7046   }
7047   if (mmdata->hasoffproc) { /* offprocess insertion */
7048     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7049     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7050   }
7051   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7052   PetscFunctionReturn(PETSC_SUCCESS);
7053 }
7054 
7055 /* Support for Pt * A, A * P, or Pt * A * P */
7056 #define MAX_NUMBER_INTERMEDIATE 4
7057 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7058 {
7059   Mat_Product           *product = C->product;
7060   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7061   Mat_MPIAIJ            *a, *p;
7062   MatMatMPIAIJBACKEND   *mmdata;
7063   ISLocalToGlobalMapping P_oth_l2g = NULL;
7064   IS                     glob      = NULL;
7065   const char            *prefix;
7066   char                   pprefix[256];
7067   const PetscInt        *globidx, *P_oth_idx;
7068   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7069   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7070   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7071                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7072                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7073   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7074 
7075   MatProductType ptype;
7076   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7077   PetscMPIInt    size;
7078 
7079   PetscFunctionBegin;
7080   MatCheckProduct(C, 1);
7081   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7082   ptype = product->type;
7083   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7084     ptype                                          = MATPRODUCT_AB;
7085     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7086   }
7087   switch (ptype) {
7088   case MATPRODUCT_AB:
7089     A          = product->A;
7090     P          = product->B;
7091     m          = A->rmap->n;
7092     n          = P->cmap->n;
7093     M          = A->rmap->N;
7094     N          = P->cmap->N;
7095     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7096     break;
7097   case MATPRODUCT_AtB:
7098     P          = product->A;
7099     A          = product->B;
7100     m          = P->cmap->n;
7101     n          = A->cmap->n;
7102     M          = P->cmap->N;
7103     N          = A->cmap->N;
7104     hasoffproc = PETSC_TRUE;
7105     break;
7106   case MATPRODUCT_PtAP:
7107     A          = product->A;
7108     P          = product->B;
7109     m          = P->cmap->n;
7110     n          = P->cmap->n;
7111     M          = P->cmap->N;
7112     N          = P->cmap->N;
7113     hasoffproc = PETSC_TRUE;
7114     break;
7115   default:
7116     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7117   }
7118   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7119   if (size == 1) hasoffproc = PETSC_FALSE;
7120 
7121   /* defaults */
7122   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7123     mp[i]    = NULL;
7124     mptmp[i] = PETSC_FALSE;
7125     rmapt[i] = -1;
7126     cmapt[i] = -1;
7127     rmapa[i] = NULL;
7128     cmapa[i] = NULL;
7129   }
7130 
7131   /* customization */
7132   PetscCall(PetscNew(&mmdata));
7133   mmdata->reusesym = product->api_user;
7134   if (ptype == MATPRODUCT_AB) {
7135     if (product->api_user) {
7136       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7137       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7138       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7139       PetscOptionsEnd();
7140     } else {
7141       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7142       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7143       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7144       PetscOptionsEnd();
7145     }
7146   } else if (ptype == MATPRODUCT_PtAP) {
7147     if (product->api_user) {
7148       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7149       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7150       PetscOptionsEnd();
7151     } else {
7152       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7153       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7154       PetscOptionsEnd();
7155     }
7156   }
7157   a = (Mat_MPIAIJ *)A->data;
7158   p = (Mat_MPIAIJ *)P->data;
7159   PetscCall(MatSetSizes(C, m, n, M, N));
7160   PetscCall(PetscLayoutSetUp(C->rmap));
7161   PetscCall(PetscLayoutSetUp(C->cmap));
7162   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7163   PetscCall(MatGetOptionsPrefix(C, &prefix));
7164 
7165   cp = 0;
7166   switch (ptype) {
7167   case MATPRODUCT_AB: /* A * P */
7168     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7169 
7170     /* A_diag * P_local (merged or not) */
7171     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7172       /* P is product->B */
7173       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7174       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7175       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7176       PetscCall(MatProductSetFill(mp[cp], product->fill));
7177       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7178       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7179       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7180       mp[cp]->product->api_user = product->api_user;
7181       PetscCall(MatProductSetFromOptions(mp[cp]));
7182       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7183       PetscCall(ISGetIndices(glob, &globidx));
7184       rmapt[cp] = 1;
7185       cmapt[cp] = 2;
7186       cmapa[cp] = globidx;
7187       mptmp[cp] = PETSC_FALSE;
7188       cp++;
7189     } else { /* A_diag * P_diag and A_diag * P_off */
7190       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7191       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7192       PetscCall(MatProductSetFill(mp[cp], product->fill));
7193       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7194       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7195       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7196       mp[cp]->product->api_user = product->api_user;
7197       PetscCall(MatProductSetFromOptions(mp[cp]));
7198       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7199       rmapt[cp] = 1;
7200       cmapt[cp] = 1;
7201       mptmp[cp] = PETSC_FALSE;
7202       cp++;
7203       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7204       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7205       PetscCall(MatProductSetFill(mp[cp], product->fill));
7206       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7207       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7208       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7209       mp[cp]->product->api_user = product->api_user;
7210       PetscCall(MatProductSetFromOptions(mp[cp]));
7211       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7212       rmapt[cp] = 1;
7213       cmapt[cp] = 2;
7214       cmapa[cp] = p->garray;
7215       mptmp[cp] = PETSC_FALSE;
7216       cp++;
7217     }
7218 
7219     /* A_off * P_other */
7220     if (mmdata->P_oth) {
7221       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7222       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7223       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7224       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7225       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7226       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7227       PetscCall(MatProductSetFill(mp[cp], product->fill));
7228       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7229       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7230       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7231       mp[cp]->product->api_user = product->api_user;
7232       PetscCall(MatProductSetFromOptions(mp[cp]));
7233       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7234       rmapt[cp] = 1;
7235       cmapt[cp] = 2;
7236       cmapa[cp] = P_oth_idx;
7237       mptmp[cp] = PETSC_FALSE;
7238       cp++;
7239     }
7240     break;
7241 
7242   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7243     /* A is product->B */
7244     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7245     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7246       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7247       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7248       PetscCall(MatProductSetFill(mp[cp], product->fill));
7249       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7250       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7251       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7252       mp[cp]->product->api_user = product->api_user;
7253       PetscCall(MatProductSetFromOptions(mp[cp]));
7254       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7255       PetscCall(ISGetIndices(glob, &globidx));
7256       rmapt[cp] = 2;
7257       rmapa[cp] = globidx;
7258       cmapt[cp] = 2;
7259       cmapa[cp] = globidx;
7260       mptmp[cp] = PETSC_FALSE;
7261       cp++;
7262     } else {
7263       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7264       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7265       PetscCall(MatProductSetFill(mp[cp], product->fill));
7266       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7267       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7268       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7269       mp[cp]->product->api_user = product->api_user;
7270       PetscCall(MatProductSetFromOptions(mp[cp]));
7271       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7272       PetscCall(ISGetIndices(glob, &globidx));
7273       rmapt[cp] = 1;
7274       cmapt[cp] = 2;
7275       cmapa[cp] = globidx;
7276       mptmp[cp] = PETSC_FALSE;
7277       cp++;
7278       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7279       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7280       PetscCall(MatProductSetFill(mp[cp], product->fill));
7281       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7282       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7283       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7284       mp[cp]->product->api_user = product->api_user;
7285       PetscCall(MatProductSetFromOptions(mp[cp]));
7286       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7287       rmapt[cp] = 2;
7288       rmapa[cp] = p->garray;
7289       cmapt[cp] = 2;
7290       cmapa[cp] = globidx;
7291       mptmp[cp] = PETSC_FALSE;
7292       cp++;
7293     }
7294     break;
7295   case MATPRODUCT_PtAP:
7296     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7297     /* P is product->B */
7298     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7299     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7300     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7301     PetscCall(MatProductSetFill(mp[cp], product->fill));
7302     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7303     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7304     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7305     mp[cp]->product->api_user = product->api_user;
7306     PetscCall(MatProductSetFromOptions(mp[cp]));
7307     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7308     PetscCall(ISGetIndices(glob, &globidx));
7309     rmapt[cp] = 2;
7310     rmapa[cp] = globidx;
7311     cmapt[cp] = 2;
7312     cmapa[cp] = globidx;
7313     mptmp[cp] = PETSC_FALSE;
7314     cp++;
7315     if (mmdata->P_oth) {
7316       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7317       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7318       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7319       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7320       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7321       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7322       PetscCall(MatProductSetFill(mp[cp], product->fill));
7323       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7324       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7325       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7326       mp[cp]->product->api_user = product->api_user;
7327       PetscCall(MatProductSetFromOptions(mp[cp]));
7328       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7329       mptmp[cp] = PETSC_TRUE;
7330       cp++;
7331       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7332       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7333       PetscCall(MatProductSetFill(mp[cp], product->fill));
7334       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7335       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7336       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7337       mp[cp]->product->api_user = product->api_user;
7338       PetscCall(MatProductSetFromOptions(mp[cp]));
7339       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7340       rmapt[cp] = 2;
7341       rmapa[cp] = globidx;
7342       cmapt[cp] = 2;
7343       cmapa[cp] = P_oth_idx;
7344       mptmp[cp] = PETSC_FALSE;
7345       cp++;
7346     }
7347     break;
7348   default:
7349     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7350   }
7351   /* sanity check */
7352   if (size > 1)
7353     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7354 
7355   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7356   for (i = 0; i < cp; i++) {
7357     mmdata->mp[i]    = mp[i];
7358     mmdata->mptmp[i] = mptmp[i];
7359   }
7360   mmdata->cp             = cp;
7361   C->product->data       = mmdata;
7362   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7363   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7364 
7365   /* memory type */
7366   mmdata->mtype = PETSC_MEMTYPE_HOST;
7367   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7368   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7369   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7370   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7371   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7372   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7373 
7374   /* prepare coo coordinates for values insertion */
7375 
7376   /* count total nonzeros of those intermediate seqaij Mats
7377     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7378     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7379     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7380   */
7381   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7382     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7383     if (mptmp[cp]) continue;
7384     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7385       const PetscInt *rmap = rmapa[cp];
7386       const PetscInt  mr   = mp[cp]->rmap->n;
7387       const PetscInt  rs   = C->rmap->rstart;
7388       const PetscInt  re   = C->rmap->rend;
7389       const PetscInt *ii   = mm->i;
7390       for (i = 0; i < mr; i++) {
7391         const PetscInt gr = rmap[i];
7392         const PetscInt nz = ii[i + 1] - ii[i];
7393         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7394         else ncoo_oown += nz;                  /* this row is local */
7395       }
7396     } else ncoo_d += mm->nz;
7397   }
7398 
7399   /*
7400     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7401 
7402     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7403 
7404     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7405 
7406     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7407     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7408     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7409 
7410     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7411     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7412   */
7413   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7414   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7415 
7416   /* gather (i,j) of nonzeros inserted by remote procs */
7417   if (hasoffproc) {
7418     PetscSF  msf;
7419     PetscInt ncoo2, *coo_i2, *coo_j2;
7420 
7421     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7422     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7423     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7424 
7425     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7426       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7427       PetscInt   *idxoff = mmdata->off[cp];
7428       PetscInt   *idxown = mmdata->own[cp];
7429       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7430         const PetscInt *rmap = rmapa[cp];
7431         const PetscInt *cmap = cmapa[cp];
7432         const PetscInt *ii   = mm->i;
7433         PetscInt       *coi  = coo_i + ncoo_o;
7434         PetscInt       *coj  = coo_j + ncoo_o;
7435         const PetscInt  mr   = mp[cp]->rmap->n;
7436         const PetscInt  rs   = C->rmap->rstart;
7437         const PetscInt  re   = C->rmap->rend;
7438         const PetscInt  cs   = C->cmap->rstart;
7439         for (i = 0; i < mr; i++) {
7440           const PetscInt *jj = mm->j + ii[i];
7441           const PetscInt  gr = rmap[i];
7442           const PetscInt  nz = ii[i + 1] - ii[i];
7443           if (gr < rs || gr >= re) { /* this is an offproc row */
7444             for (j = ii[i]; j < ii[i + 1]; j++) {
7445               *coi++    = gr;
7446               *idxoff++ = j;
7447             }
7448             if (!cmapt[cp]) { /* already global */
7449               for (j = 0; j < nz; j++) *coj++ = jj[j];
7450             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7451               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7452             } else { /* offdiag */
7453               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7454             }
7455             ncoo_o += nz;
7456           } else { /* this is a local row */
7457             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7458           }
7459         }
7460       }
7461       mmdata->off[cp + 1] = idxoff;
7462       mmdata->own[cp + 1] = idxown;
7463     }
7464 
7465     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7466     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7467     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7468     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7469     ncoo = ncoo_d + ncoo_oown + ncoo2;
7470     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7471     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7472     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7473     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7474     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7475     PetscCall(PetscFree2(coo_i, coo_j));
7476     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7477     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7478     coo_i = coo_i2;
7479     coo_j = coo_j2;
7480   } else { /* no offproc values insertion */
7481     ncoo = ncoo_d;
7482     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7483 
7484     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7485     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7486     PetscCall(PetscSFSetUp(mmdata->sf));
7487   }
7488   mmdata->hasoffproc = hasoffproc;
7489 
7490   /* gather (i,j) of nonzeros inserted locally */
7491   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7492     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7493     PetscInt       *coi  = coo_i + ncoo_d;
7494     PetscInt       *coj  = coo_j + ncoo_d;
7495     const PetscInt *jj   = mm->j;
7496     const PetscInt *ii   = mm->i;
7497     const PetscInt *cmap = cmapa[cp];
7498     const PetscInt *rmap = rmapa[cp];
7499     const PetscInt  mr   = mp[cp]->rmap->n;
7500     const PetscInt  rs   = C->rmap->rstart;
7501     const PetscInt  re   = C->rmap->rend;
7502     const PetscInt  cs   = C->cmap->rstart;
7503 
7504     if (mptmp[cp]) continue;
7505     if (rmapt[cp] == 1) { /* consecutive rows */
7506       /* fill coo_i */
7507       for (i = 0; i < mr; i++) {
7508         const PetscInt gr = i + rs;
7509         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7510       }
7511       /* fill coo_j */
7512       if (!cmapt[cp]) { /* type-0, already global */
7513         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7514       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7515         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7516       } else {                                            /* type-2, local to global for sparse columns */
7517         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7518       }
7519       ncoo_d += mm->nz;
7520     } else if (rmapt[cp] == 2) { /* sparse rows */
7521       for (i = 0; i < mr; i++) {
7522         const PetscInt *jj = mm->j + ii[i];
7523         const PetscInt  gr = rmap[i];
7524         const PetscInt  nz = ii[i + 1] - ii[i];
7525         if (gr >= rs && gr < re) { /* local rows */
7526           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7527           if (!cmapt[cp]) { /* type-0, already global */
7528             for (j = 0; j < nz; j++) *coj++ = jj[j];
7529           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7530             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7531           } else { /* type-2, local to global for sparse columns */
7532             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7533           }
7534           ncoo_d += nz;
7535         }
7536       }
7537     }
7538   }
7539   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7540   PetscCall(ISDestroy(&glob));
7541   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7542   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7543   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7544   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7545 
7546   /* preallocate with COO data */
7547   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7548   PetscCall(PetscFree2(coo_i, coo_j));
7549   PetscFunctionReturn(PETSC_SUCCESS);
7550 }
7551 
7552 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7553 {
7554   Mat_Product *product = mat->product;
7555 #if defined(PETSC_HAVE_DEVICE)
7556   PetscBool match  = PETSC_FALSE;
7557   PetscBool usecpu = PETSC_FALSE;
7558 #else
7559   PetscBool match = PETSC_TRUE;
7560 #endif
7561 
7562   PetscFunctionBegin;
7563   MatCheckProduct(mat, 1);
7564 #if defined(PETSC_HAVE_DEVICE)
7565   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7566   if (match) { /* we can always fallback to the CPU if requested */
7567     switch (product->type) {
7568     case MATPRODUCT_AB:
7569       if (product->api_user) {
7570         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7571         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7572         PetscOptionsEnd();
7573       } else {
7574         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7575         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7576         PetscOptionsEnd();
7577       }
7578       break;
7579     case MATPRODUCT_AtB:
7580       if (product->api_user) {
7581         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7582         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7583         PetscOptionsEnd();
7584       } else {
7585         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7586         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7587         PetscOptionsEnd();
7588       }
7589       break;
7590     case MATPRODUCT_PtAP:
7591       if (product->api_user) {
7592         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7593         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7594         PetscOptionsEnd();
7595       } else {
7596         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7597         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7598         PetscOptionsEnd();
7599       }
7600       break;
7601     default:
7602       break;
7603     }
7604     match = (PetscBool)!usecpu;
7605   }
7606 #endif
7607   if (match) {
7608     switch (product->type) {
7609     case MATPRODUCT_AB:
7610     case MATPRODUCT_AtB:
7611     case MATPRODUCT_PtAP:
7612       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7613       break;
7614     default:
7615       break;
7616     }
7617   }
7618   /* fallback to MPIAIJ ops */
7619   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7620   PetscFunctionReturn(PETSC_SUCCESS);
7621 }
7622 
7623 /*
7624    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7625 
7626    n - the number of block indices in cc[]
7627    cc - the block indices (must be large enough to contain the indices)
7628 */
7629 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7630 {
7631   PetscInt        cnt = -1, nidx, j;
7632   const PetscInt *idx;
7633 
7634   PetscFunctionBegin;
7635   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7636   if (nidx) {
7637     cnt     = 0;
7638     cc[cnt] = idx[0] / bs;
7639     for (j = 1; j < nidx; j++) {
7640       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7641     }
7642   }
7643   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7644   *n = cnt + 1;
7645   PetscFunctionReturn(PETSC_SUCCESS);
7646 }
7647 
7648 /*
7649     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7650 
7651     ncollapsed - the number of block indices
7652     collapsed - the block indices (must be large enough to contain the indices)
7653 */
7654 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7655 {
7656   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7657 
7658   PetscFunctionBegin;
7659   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7660   for (i = start + 1; i < start + bs; i++) {
7661     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7662     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7663     cprevtmp = cprev;
7664     cprev    = merged;
7665     merged   = cprevtmp;
7666   }
7667   *ncollapsed = nprev;
7668   if (collapsed) *collapsed = cprev;
7669   PetscFunctionReturn(PETSC_SUCCESS);
7670 }
7671 
7672 /*
7673    This will eventually be folded into MatCreateGraph_AIJ() for optimal performance
7674 */
7675 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG)
7676 {
7677   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7678   Mat                tGmat;
7679   MPI_Comm           comm;
7680   const PetscScalar *vals;
7681   const PetscInt    *idx;
7682   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7683   MatScalar         *AA; // this is checked in graph
7684   PetscBool          isseqaij;
7685   Mat                a, b, c;
7686   MatType            jtype;
7687 
7688   PetscFunctionBegin;
7689   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
7690   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
7691   PetscCall(MatGetType(Gmat, &jtype));
7692   PetscCall(MatCreate(comm, &tGmat));
7693   PetscCall(MatSetType(tGmat, jtype));
7694 
7695   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7696                Also, if the matrix is symmetric, can we skip this
7697                operation? It can be very expensive on large matrices. */
7698 
7699   // global sizes
7700   PetscCall(MatGetSize(Gmat, &MM, &NN));
7701   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7702   nloc = Iend - Istart;
7703   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
7704   if (isseqaij) {
7705     a = Gmat;
7706     b = NULL;
7707   } else {
7708     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7709     a             = d->A;
7710     b             = d->B;
7711     garray        = d->garray;
7712   }
7713   /* Determine upper bound on non-zeros needed in new filtered matrix */
7714   for (PetscInt row = 0; row < nloc; row++) {
7715     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
7716     d_nnz[row] = ncols;
7717     if (ncols > maxcols) maxcols = ncols;
7718     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
7719   }
7720   if (b) {
7721     for (PetscInt row = 0; row < nloc; row++) {
7722       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
7723       o_nnz[row] = ncols;
7724       if (ncols > maxcols) maxcols = ncols;
7725       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
7726     }
7727   }
7728   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
7729   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7730   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
7731   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
7732   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7733   PetscCall(PetscFree2(d_nnz, o_nnz));
7734   //
7735   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
7736   nnz0 = nnz1 = 0;
7737   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7738     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7739       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
7740       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7741         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7742         if (PetscRealPart(sv) > vfilter) {
7743           nnz1++;
7744           PetscInt cid = idx[jj] + Istart; //diag
7745           if (c != a) cid = garray[idx[jj]];
7746           AA[ncol_row] = vals[jj];
7747           AJ[ncol_row] = cid;
7748           ncol_row++;
7749         }
7750       }
7751       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
7752       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
7753     }
7754   }
7755   PetscCall(PetscFree2(AA, AJ));
7756   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
7757   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
7758   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
7759 
7760   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
7761 
7762   *filteredG = tGmat;
7763   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7764   PetscFunctionReturn(PETSC_SUCCESS);
7765 }
7766 
7767 /*
7768  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7769 
7770  Input Parameter:
7771  . Amat - matrix
7772  - symmetrize - make the result symmetric
7773  + scale - scale with diagonal
7774 
7775  Output Parameter:
7776  . a_Gmat - output scalar graph >= 0
7777 
7778 */
7779 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat)
7780 {
7781   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7782   MPI_Comm  comm;
7783   Mat       Gmat;
7784   PetscBool ismpiaij, isseqaij;
7785   Mat       a, b, c;
7786   MatType   jtype;
7787 
7788   PetscFunctionBegin;
7789   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7790   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7791   PetscCall(MatGetSize(Amat, &MM, &NN));
7792   PetscCall(MatGetBlockSize(Amat, &bs));
7793   nloc = (Iend - Istart) / bs;
7794 
7795   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7796   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7797   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7798 
7799   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7800   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7801      implementation */
7802   if (bs > 1) {
7803     PetscCall(MatGetType(Amat, &jtype));
7804     PetscCall(MatCreate(comm, &Gmat));
7805     PetscCall(MatSetType(Gmat, jtype));
7806     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7807     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7808     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7809       PetscInt  *d_nnz, *o_nnz;
7810       MatScalar *aa, val, *AA;
7811       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7812       if (isseqaij) {
7813         a = Amat;
7814         b = NULL;
7815       } else {
7816         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7817         a             = d->A;
7818         b             = d->B;
7819       }
7820       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7821       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7822       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7823         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7824         const PetscInt *cols1, *cols2;
7825         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7826           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7827           nnz[brow / bs] = nc2 / bs;
7828           if (nc2 % bs) ok = 0;
7829           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7830           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7831             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7832             if (nc1 != nc2) ok = 0;
7833             else {
7834               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7835                 if (cols1[jj] != cols2[jj]) ok = 0;
7836                 if (cols1[jj] % bs != jj % bs) ok = 0;
7837               }
7838             }
7839             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7840           }
7841           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7842           if (!ok) {
7843             PetscCall(PetscFree2(d_nnz, o_nnz));
7844             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7845             goto old_bs;
7846           }
7847         }
7848       }
7849       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7850       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7851       PetscCall(PetscFree2(d_nnz, o_nnz));
7852       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7853       // diag
7854       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7855         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7856         ai               = aseq->i;
7857         n                = ai[brow + 1] - ai[brow];
7858         aj               = aseq->j + ai[brow];
7859         for (int k = 0; k < n; k += bs) {        // block columns
7860           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7861           val        = 0;
7862           for (int ii = 0; ii < bs; ii++) { // rows in block
7863             aa = aseq->a + ai[brow + ii] + k;
7864             for (int jj = 0; jj < bs; jj++) {         // columns in block
7865               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7866             }
7867           }
7868           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7869           AA[k / bs] = val;
7870         }
7871         grow = Istart / bs + brow / bs;
7872         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7873       }
7874       // off-diag
7875       if (ismpiaij) {
7876         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7877         const PetscScalar *vals;
7878         const PetscInt    *cols, *garray = aij->garray;
7879         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7880         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7881           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7882           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7883             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7884             AA[k / bs] = 0;
7885             AJ[cidx]   = garray[cols[k]] / bs;
7886           }
7887           nc = ncols / bs;
7888           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7889           for (int ii = 0; ii < bs; ii++) { // rows in block
7890             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7891             for (int k = 0; k < ncols; k += bs) {
7892               for (int jj = 0; jj < bs; jj++) { // cols in block
7893                 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7894                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7895               }
7896             }
7897             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7898           }
7899           grow = Istart / bs + brow / bs;
7900           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7901         }
7902       }
7903       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7904       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7905       PetscCall(PetscFree2(AA, AJ));
7906     } else {
7907       const PetscScalar *vals;
7908       const PetscInt    *idx;
7909       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7910     old_bs:
7911       /*
7912        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7913        */
7914       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7915       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7916       if (isseqaij) {
7917         PetscInt max_d_nnz;
7918         /*
7919          Determine exact preallocation count for (sequential) scalar matrix
7920          */
7921         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7922         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7923         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7924         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7925         PetscCall(PetscFree3(w0, w1, w2));
7926       } else if (ismpiaij) {
7927         Mat             Daij, Oaij;
7928         const PetscInt *garray;
7929         PetscInt        max_d_nnz;
7930         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7931         /*
7932          Determine exact preallocation count for diagonal block portion of scalar matrix
7933          */
7934         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7935         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7936         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7937         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7938         PetscCall(PetscFree3(w0, w1, w2));
7939         /*
7940          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7941          */
7942         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7943           o_nnz[jj] = 0;
7944           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7945             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7946             o_nnz[jj] += ncols;
7947             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7948           }
7949           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7950         }
7951       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7952       /* get scalar copy (norms) of matrix */
7953       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7954       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7955       PetscCall(PetscFree2(d_nnz, o_nnz));
7956       for (Ii = Istart; Ii < Iend; Ii++) {
7957         PetscInt dest_row = Ii / bs;
7958         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7959         for (jj = 0; jj < ncols; jj++) {
7960           PetscInt    dest_col = idx[jj] / bs;
7961           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7962           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7963         }
7964         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7965       }
7966       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7967       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7968     }
7969   } else {
7970     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7971     else {
7972       Gmat = Amat;
7973       PetscCall(PetscObjectReference((PetscObject)Gmat));
7974     }
7975     if (isseqaij) {
7976       a = Gmat;
7977       b = NULL;
7978     } else {
7979       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7980       a             = d->A;
7981       b             = d->B;
7982     }
7983     if (filter >= 0 || scale) {
7984       /* take absolute value of each entry */
7985       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7986         MatInfo      info;
7987         PetscScalar *avals;
7988         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7989         PetscCall(MatSeqAIJGetArray(c, &avals));
7990         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7991         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7992       }
7993     }
7994   }
7995   if (symmetrize) {
7996     PetscBool isset, issym;
7997     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7998     if (!isset || !issym) {
7999       Mat matTrans;
8000       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8001       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8002       PetscCall(MatDestroy(&matTrans));
8003     }
8004     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8005   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8006   if (scale) {
8007     /* scale c for all diagonal values = 1 or -1 */
8008     Vec diag;
8009     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8010     PetscCall(MatGetDiagonal(Gmat, diag));
8011     PetscCall(VecReciprocal(diag));
8012     PetscCall(VecSqrtAbs(diag));
8013     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8014     PetscCall(VecDestroy(&diag));
8015   }
8016   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8017 
8018   if (filter >= 0) {
8019     Mat Fmat = NULL; /* some silly compiler needs this */
8020 
8021     PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat));
8022     PetscCall(MatDestroy(&Gmat));
8023     Gmat = Fmat;
8024   }
8025   *a_Gmat = Gmat;
8026   PetscFunctionReturn(PETSC_SUCCESS);
8027 }
8028 
8029 /*
8030     Special version for direct calls from Fortran
8031 */
8032 #include <petsc/private/fortranimpl.h>
8033 
8034 /* Change these macros so can be used in void function */
8035 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8036 #undef PetscCall
8037 #define PetscCall(...) \
8038   do { \
8039     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8040     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8041       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8042       return; \
8043     } \
8044   } while (0)
8045 
8046 #undef SETERRQ
8047 #define SETERRQ(comm, ierr, ...) \
8048   do { \
8049     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8050     return; \
8051   } while (0)
8052 
8053 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8054   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8055 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8056   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8057 #else
8058 #endif
8059 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8060 {
8061   Mat         mat = *mmat;
8062   PetscInt    m = *mm, n = *mn;
8063   InsertMode  addv = *maddv;
8064   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8065   PetscScalar value;
8066 
8067   MatCheckPreallocated(mat, 1);
8068   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8069   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8070   {
8071     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8072     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8073     PetscBool roworiented = aij->roworiented;
8074 
8075     /* Some Variables required in the macro */
8076     Mat         A     = aij->A;
8077     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8078     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8079     MatScalar  *aa;
8080     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8081     Mat         B                 = aij->B;
8082     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8083     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8084     MatScalar  *ba;
8085     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8086      * cannot use "#if defined" inside a macro. */
8087     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8088 
8089     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8090     PetscInt   nonew = a->nonew;
8091     MatScalar *ap1, *ap2;
8092 
8093     PetscFunctionBegin;
8094     PetscCall(MatSeqAIJGetArray(A, &aa));
8095     PetscCall(MatSeqAIJGetArray(B, &ba));
8096     for (i = 0; i < m; i++) {
8097       if (im[i] < 0) continue;
8098       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8099       if (im[i] >= rstart && im[i] < rend) {
8100         row      = im[i] - rstart;
8101         lastcol1 = -1;
8102         rp1      = aj + ai[row];
8103         ap1      = aa + ai[row];
8104         rmax1    = aimax[row];
8105         nrow1    = ailen[row];
8106         low1     = 0;
8107         high1    = nrow1;
8108         lastcol2 = -1;
8109         rp2      = bj + bi[row];
8110         ap2      = ba + bi[row];
8111         rmax2    = bimax[row];
8112         nrow2    = bilen[row];
8113         low2     = 0;
8114         high2    = nrow2;
8115 
8116         for (j = 0; j < n; j++) {
8117           if (roworiented) value = v[i * n + j];
8118           else value = v[i + j * m];
8119           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8120           if (in[j] >= cstart && in[j] < cend) {
8121             col = in[j] - cstart;
8122             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8123           } else if (in[j] < 0) continue;
8124           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8125             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8126           } else {
8127             if (mat->was_assembled) {
8128               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8129 #if defined(PETSC_USE_CTABLE)
8130               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8131               col--;
8132 #else
8133               col = aij->colmap[in[j]] - 1;
8134 #endif
8135               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8136                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8137                 col = in[j];
8138                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8139                 B        = aij->B;
8140                 b        = (Mat_SeqAIJ *)B->data;
8141                 bimax    = b->imax;
8142                 bi       = b->i;
8143                 bilen    = b->ilen;
8144                 bj       = b->j;
8145                 rp2      = bj + bi[row];
8146                 ap2      = ba + bi[row];
8147                 rmax2    = bimax[row];
8148                 nrow2    = bilen[row];
8149                 low2     = 0;
8150                 high2    = nrow2;
8151                 bm       = aij->B->rmap->n;
8152                 ba       = b->a;
8153                 inserted = PETSC_FALSE;
8154               }
8155             } else col = in[j];
8156             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8157           }
8158         }
8159       } else if (!aij->donotstash) {
8160         if (roworiented) {
8161           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8162         } else {
8163           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8164         }
8165       }
8166     }
8167     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8168     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8169   }
8170   PetscFunctionReturnVoid();
8171 }
8172 
8173 /* Undefining these here since they were redefined from their original definition above! No
8174  * other PETSc functions should be defined past this point, as it is impossible to recover the
8175  * original definitions */
8176 #undef PetscCall
8177 #undef SETERRQ
8178