xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e91c04dfc8a52dee1965211bb1cc8e5bf775178f)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */
10 #define TYPE AIJ
11 #define TYPE_AIJ
12 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
13 #undef TYPE
14 #undef TYPE_AIJ
15 
16 static PetscErrorCode MatReset_MPIAIJ(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
19 
20   PetscFunctionBegin;
21   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
22   PetscCall(MatStashDestroy_Private(&mat->stash));
23   PetscCall(VecDestroy(&aij->diag));
24   PetscCall(MatDestroy(&aij->A));
25   PetscCall(MatDestroy(&aij->B));
26 #if defined(PETSC_USE_CTABLE)
27   PetscCall(PetscHMapIDestroy(&aij->colmap));
28 #else
29   PetscCall(PetscFree(aij->colmap));
30 #endif
31   PetscCall(PetscFree(aij->garray));
32   PetscCall(VecDestroy(&aij->lvec));
33   PetscCall(VecScatterDestroy(&aij->Mvctx));
34   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
35   PetscCall(PetscFree(aij->ld));
36   PetscFunctionReturn(PETSC_SUCCESS);
37 }
38 
39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
42   /* Save the nonzero states of the component matrices because those are what are used to determine
43     the nonzero state of mat */
44   PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate;
45 
46   PetscFunctionBegin;
47   PetscCall(MatReset_MPIAIJ(mat));
48   PetscCall(MatSetUp_MPI_Hash(mat));
49   aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate;
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
54 {
55   PetscFunctionBegin;
56   PetscCall(MatReset_MPIAIJ(mat));
57 
58   PetscCall(PetscFree(mat->data));
59 
60   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
61   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
62 
63   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
74 #if defined(PETSC_HAVE_CUDA)
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
76 #endif
77 #if defined(PETSC_HAVE_HIP)
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
79 #endif
80 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
82 #endif
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
84 #if defined(PETSC_HAVE_ELEMENTAL)
85   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
86 #endif
87 #if defined(PETSC_HAVE_SCALAPACK)
88   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
89 #endif
90 #if defined(PETSC_HAVE_HYPRE)
91   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
92   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
93 #endif
94   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
95   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
96   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
97   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
98   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
99   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
100 #if defined(PETSC_HAVE_MKL_SPARSE)
101   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
102 #endif
103   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
104   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
105   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
106   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
107   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
112 {
113   Mat B;
114 
115   PetscFunctionBegin;
116   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
117   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
118   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
119   PetscCall(MatDestroy(&B));
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
124 {
125   Mat B;
126 
127   PetscFunctionBegin;
128   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
129   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
130   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*MC
135    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
136 
137    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
138    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
139   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
140   for communicators controlling multiple processes.  It is recommended that you call both of
141   the above preallocation routines for simplicity.
142 
143    Options Database Key:
144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
145 
146   Developer Note:
147   Level: beginner
148 
149     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
150    enough exist.
151 
152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
153 M*/
154 
155 /*MC
156    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
157 
158    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
159    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
160    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
161   for communicators controlling multiple processes.  It is recommended that you call both of
162   the above preallocation routines for simplicity.
163 
164    Options Database Key:
165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
166 
167   Level: beginner
168 
169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
170 M*/
171 
172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
173 {
174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
175 
176   PetscFunctionBegin;
177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
178   A->boundtocpu = flg;
179 #endif
180   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
181   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
182 
183   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
184    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
185    * to differ from the parent matrix. */
186   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
187   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
192 {
193   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
194 
195   PetscFunctionBegin;
196   if (mat->A) {
197     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
198     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
199   }
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
204 {
205   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
206   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
207   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
208   const PetscInt  *ia, *ib;
209   const MatScalar *aa, *bb, *aav, *bav;
210   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
211   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
212 
213   PetscFunctionBegin;
214   *keptrows = NULL;
215 
216   ia = a->i;
217   ib = b->i;
218   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
219   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
220   for (i = 0; i < m; i++) {
221     na = ia[i + 1] - ia[i];
222     nb = ib[i + 1] - ib[i];
223     if (!na && !nb) {
224       cnt++;
225       goto ok1;
226     }
227     aa = aav + ia[i];
228     for (j = 0; j < na; j++) {
229       if (aa[j] != 0.0) goto ok1;
230     }
231     bb = PetscSafePointerPlusOffset(bav, ib[i]);
232     for (j = 0; j < nb; j++) {
233       if (bb[j] != 0.0) goto ok1;
234     }
235     cnt++;
236   ok1:;
237   }
238   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
239   if (!n0rows) {
240     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
241     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
242     PetscFunctionReturn(PETSC_SUCCESS);
243   }
244   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
245   cnt = 0;
246   for (i = 0; i < m; i++) {
247     na = ia[i + 1] - ia[i];
248     nb = ib[i + 1] - ib[i];
249     if (!na && !nb) continue;
250     aa = aav + ia[i];
251     for (j = 0; j < na; j++) {
252       if (aa[j] != 0.0) {
253         rows[cnt++] = rstart + i;
254         goto ok2;
255       }
256     }
257     bb = PetscSafePointerPlusOffset(bav, ib[i]);
258     for (j = 0; j < nb; j++) {
259       if (bb[j] != 0.0) {
260         rows[cnt++] = rstart + i;
261         goto ok2;
262       }
263     }
264   ok2:;
265   }
266   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
267   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
268   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
275   PetscBool   cong;
276 
277   PetscFunctionBegin;
278   PetscCall(MatHasCongruentLayouts(Y, &cong));
279   if (Y->assembled && cong) {
280     PetscCall(MatDiagonalSet(aij->A, D, is));
281   } else {
282     PetscCall(MatDiagonalSet_Default(Y, D, is));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
288 {
289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
290   PetscInt    i, rstart, nrows, *rows;
291 
292   PetscFunctionBegin;
293   *zrows = NULL;
294   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
295   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
296   for (i = 0; i < nrows; i++) rows[i] += rstart;
297   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
302 {
303   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
304   PetscInt           i, m, n, *garray = aij->garray;
305   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
306   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
307   PetscReal         *work;
308   const PetscScalar *dummy;
309 
310   PetscFunctionBegin;
311   PetscCall(MatGetSize(A, &m, &n));
312   PetscCall(PetscCalloc1(n, &work));
313   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
314   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
315   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
316   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
317   if (type == NORM_2) {
318     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
319     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
320   } else if (type == NORM_1) {
321     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
322     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
323   } else if (type == NORM_INFINITY) {
324     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
325     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
326   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
327     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
328     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
329   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
330     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
331     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
332   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
333   if (type == NORM_INFINITY) {
334     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
335   } else {
336     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
337   }
338   PetscCall(PetscFree(work));
339   if (type == NORM_2) {
340     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
341   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
342     for (i = 0; i < n; i++) reductions[i] /= m;
343   }
344   PetscFunctionReturn(PETSC_SUCCESS);
345 }
346 
347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
348 {
349   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
350   IS              sis, gis;
351   const PetscInt *isis, *igis;
352   PetscInt        n, *iis, nsis, ngis, rstart, i;
353 
354   PetscFunctionBegin;
355   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
356   PetscCall(MatFindNonzeroRows(a->B, &gis));
357   PetscCall(ISGetSize(gis, &ngis));
358   PetscCall(ISGetSize(sis, &nsis));
359   PetscCall(ISGetIndices(sis, &isis));
360   PetscCall(ISGetIndices(gis, &igis));
361 
362   PetscCall(PetscMalloc1(ngis + nsis, &iis));
363   PetscCall(PetscArraycpy(iis, igis, ngis));
364   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
365   n = ngis + nsis;
366   PetscCall(PetscSortRemoveDupsInt(&n, iis));
367   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
368   for (i = 0; i < n; i++) iis[i] += rstart;
369   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
370 
371   PetscCall(ISRestoreIndices(sis, &isis));
372   PetscCall(ISRestoreIndices(gis, &igis));
373   PetscCall(ISDestroy(&sis));
374   PetscCall(ISDestroy(&gis));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 /*
379   Local utility routine that creates a mapping from the global column
380 number to the local number in the off-diagonal part of the local
381 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
382 a slightly higher hash table cost; without it it is not scalable (each processor
383 has an order N integer array but is fast to access.
384 */
385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
386 {
387   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
388   PetscInt    n   = aij->B->cmap->n, i;
389 
390   PetscFunctionBegin;
391   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
392 #if defined(PETSC_USE_CTABLE)
393   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
394   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
395 #else
396   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
397   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
398 #endif
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
403   do { \
404     if (col <= lastcol1) low1 = 0; \
405     else high1 = nrow1; \
406     lastcol1 = col; \
407     while (high1 - low1 > 5) { \
408       t = (low1 + high1) / 2; \
409       if (rp1[t] > col) high1 = t; \
410       else low1 = t; \
411     } \
412     for (_i = low1; _i < high1; _i++) { \
413       if (rp1[_i] > col) break; \
414       if (rp1[_i] == col) { \
415         if (addv == ADD_VALUES) { \
416           ap1[_i] += value; \
417           /* Not sure LogFlops will slow dow the code or not */ \
418           (void)PetscLogFlops(1.0); \
419         } else ap1[_i] = value; \
420         goto a_noinsert; \
421       } \
422     } \
423     if (value == 0.0 && ignorezeroentries && row != col) { \
424       low1  = 0; \
425       high1 = nrow1; \
426       goto a_noinsert; \
427     } \
428     if (nonew == 1) { \
429       low1  = 0; \
430       high1 = nrow1; \
431       goto a_noinsert; \
432     } \
433     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
434     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
435     N = nrow1++ - 1; \
436     a->nz++; \
437     high1++; \
438     /* shift up all the later entries in this row */ \
439     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
440     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
441     rp1[_i] = col; \
442     ap1[_i] = value; \
443   a_noinsert:; \
444     ailen[row] = nrow1; \
445   } while (0)
446 
447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
448   do { \
449     if (col <= lastcol2) low2 = 0; \
450     else high2 = nrow2; \
451     lastcol2 = col; \
452     while (high2 - low2 > 5) { \
453       t = (low2 + high2) / 2; \
454       if (rp2[t] > col) high2 = t; \
455       else low2 = t; \
456     } \
457     for (_i = low2; _i < high2; _i++) { \
458       if (rp2[_i] > col) break; \
459       if (rp2[_i] == col) { \
460         if (addv == ADD_VALUES) { \
461           ap2[_i] += value; \
462           (void)PetscLogFlops(1.0); \
463         } else ap2[_i] = value; \
464         goto b_noinsert; \
465       } \
466     } \
467     if (value == 0.0 && ignorezeroentries) { \
468       low2  = 0; \
469       high2 = nrow2; \
470       goto b_noinsert; \
471     } \
472     if (nonew == 1) { \
473       low2  = 0; \
474       high2 = nrow2; \
475       goto b_noinsert; \
476     } \
477     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
478     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
479     N = nrow2++ - 1; \
480     b->nz++; \
481     high2++; \
482     /* shift up all the later entries in this row */ \
483     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
484     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
485     rp2[_i] = col; \
486     ap2[_i] = value; \
487   b_noinsert:; \
488     bilen[row] = nrow2; \
489   } while (0)
490 
491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
492 {
493   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
494   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
495   PetscInt     l, *garray                         = mat->garray, diag;
496   PetscScalar *aa, *ba;
497 
498   PetscFunctionBegin;
499   /* code only works for square matrices A */
500 
501   /* find size of row to the left of the diagonal part */
502   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
503   row = row - diag;
504   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
505     if (garray[b->j[b->i[row] + l]] > diag) break;
506   }
507   if (l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512 
513   /* diagonal part */
514   if (a->i[row + 1] - a->i[row]) {
515     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
516     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
517     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
518   }
519 
520   /* right of diagonal part */
521   if (b->i[row + 1] - b->i[row] - l) {
522     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
523     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
524     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
525   }
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
530 {
531   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
532   PetscScalar value = 0.0;
533   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
534   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
535   PetscBool   roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat         A     = aij->A;
539   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
540   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
541   PetscBool   ignorezeroentries = a->ignorezeroentries;
542   Mat         B                 = aij->B;
543   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
544   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
545   MatScalar  *aa, *ba;
546   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
547   PetscInt    nonew;
548   MatScalar  *ap1, *ap2;
549 
550   PetscFunctionBegin;
551   PetscCall(MatSeqAIJGetArray(A, &aa));
552   PetscCall(MatSeqAIJGetArray(B, &ba));
553   for (i = 0; i < m; i++) {
554     if (im[i] < 0) continue;
555     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
560       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
567       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j = 0; j < n; j++) {
574         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
575         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
580         } else if (in[j] < 0) {
581           continue;
582         } else {
583           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
584           if (mat->was_assembled) {
585             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
586 #if defined(PETSC_USE_CTABLE)
587             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
593               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
594               col = in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ *)B->data;
598               bimax = b->imax;
599               bi    = b->i;
600               bilen = b->ilen;
601               bj    = b->j;
602               ba    = b->a;
603               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
604               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
612               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
613                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
614               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
615             }
616           } else col = in[j];
617           nonew = b->nonew;
618           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
619         }
620       }
621     } else {
622       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
623       if (!aij->donotstash) {
624         mat->assembled = PETSC_FALSE;
625         if (roworiented) {
626           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
627         } else {
628           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
629         }
630       }
631     }
632   }
633   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
634   PetscCall(MatSeqAIJRestoreArray(B, &ba));
635   PetscFunctionReturn(PETSC_SUCCESS);
636 }
637 
638 /*
639     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
640     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
641     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
642 */
643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
644 {
645   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
646   Mat         A      = aij->A; /* diagonal part of the matrix */
647   Mat         B      = aij->B; /* off-diagonal part of the matrix */
648   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
649   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
650   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
651   PetscInt   *ailen = a->ilen, *aj = a->j;
652   PetscInt   *bilen = b->ilen, *bj = b->j;
653   PetscInt    am          = aij->A->rmap->n, j;
654   PetscInt    diag_so_far = 0, dnz;
655   PetscInt    offd_so_far = 0, onz;
656 
657   PetscFunctionBegin;
658   /* Iterate over all rows of the matrix */
659   for (j = 0; j < am; j++) {
660     dnz = onz = 0;
661     /*  Iterate over all non-zero columns of the current row */
662     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
663       /* If column is in the diagonal */
664       if (mat_j[col] >= cstart && mat_j[col] < cend) {
665         aj[diag_so_far++] = mat_j[col] - cstart;
666         dnz++;
667       } else { /* off-diagonal entries */
668         bj[offd_so_far++] = mat_j[col];
669         onz++;
670       }
671     }
672     ailen[j] = dnz;
673     bilen[j] = onz;
674   }
675   PetscFunctionReturn(PETSC_SUCCESS);
676 }
677 
678 /*
679     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
680     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
681     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
682     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
683     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
684 */
685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
686 {
687   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
688   Mat          A    = aij->A; /* diagonal part of the matrix */
689   Mat          B    = aij->B; /* off-diagonal part of the matrix */
690   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
691   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
692   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
693   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
694   PetscInt    *ailen = a->ilen, *aj = a->j;
695   PetscInt    *bilen = b->ilen, *bj = b->j;
696   PetscInt     am          = aij->A->rmap->n, j;
697   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
698   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
699   PetscScalar *aa = a->a, *ba = b->a;
700 
701   PetscFunctionBegin;
702   /* Iterate over all rows of the matrix */
703   for (j = 0; j < am; j++) {
704     dnz_row = onz_row = 0;
705     rowstart_offd     = full_offd_i[j];
706     rowstart_diag     = full_diag_i[j];
707     /*  Iterate over all non-zero columns of the current row */
708     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
709       /* If column is in the diagonal */
710       if (mat_j[col] >= cstart && mat_j[col] < cend) {
711         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
712         aa[rowstart_diag + dnz_row] = mat_a[col];
713         dnz_row++;
714       } else { /* off-diagonal entries */
715         bj[rowstart_offd + onz_row] = mat_j[col];
716         ba[rowstart_offd + onz_row] = mat_a[col];
717         onz_row++;
718       }
719     }
720     ailen[j] = dnz_row;
721     bilen[j] = onz_row;
722   }
723   PetscFunctionReturn(PETSC_SUCCESS);
724 }
725 
726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
727 {
728   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
729   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
730   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
731 
732   PetscFunctionBegin;
733   for (i = 0; i < m; i++) {
734     if (idxm[i] < 0) continue; /* negative row */
735     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
736     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
737     row = idxm[i] - rstart;
738     for (j = 0; j < n; j++) {
739       if (idxn[j] < 0) continue; /* negative column */
740       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
741       if (idxn[j] >= cstart && idxn[j] < cend) {
742         col = idxn[j] - cstart;
743         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
744       } else {
745         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
746 #if defined(PETSC_USE_CTABLE)
747         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
748         col--;
749 #else
750         col = aij->colmap[idxn[j]] - 1;
751 #endif
752         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
753         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
754       }
755     }
756   }
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
761 {
762   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
763   PetscInt    nstash, reallocs;
764 
765   PetscFunctionBegin;
766   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
767 
768   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
769   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
770   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
771   PetscFunctionReturn(PETSC_SUCCESS);
772 }
773 
774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
775 {
776   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
777   PetscMPIInt  n;
778   PetscInt     i, j, rstart, ncols, flg;
779   PetscInt    *row, *col;
780   PetscBool    other_disassembled;
781   PetscScalar *val;
782 
783   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
784 
785   PetscFunctionBegin;
786   if (!aij->donotstash && !mat->nooffprocentries) {
787     while (1) {
788       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
789       if (!flg) break;
790 
791       for (i = 0; i < n;) {
792         /* Now identify the consecutive vals belonging to the same row */
793         for (j = i, rstart = row[j]; j < n; j++) {
794           if (row[j] != rstart) break;
795         }
796         if (j < n) ncols = j - i;
797         else ncols = n - i;
798         /* Now assemble all these values with a single function call */
799         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
800         i = j;
801       }
802     }
803     PetscCall(MatStashScatterEnd_Private(&mat->stash));
804   }
805 #if defined(PETSC_HAVE_DEVICE)
806   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
807   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
808   if (mat->boundtocpu) {
809     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
810     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
811   }
812 #endif
813   PetscCall(MatAssemblyBegin(aij->A, mode));
814   PetscCall(MatAssemblyEnd(aij->A, mode));
815 
816   /* determine if any processor has disassembled, if so we must
817      also disassemble ourself, in order that we may reassemble. */
818   /*
819      if nonzero structure of submatrix B cannot change then we know that
820      no processor disassembled thus we can skip this stuff
821   */
822   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
823     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
824     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
825       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
826     }
827   }
828   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
829   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
830 #if defined(PETSC_HAVE_DEVICE)
831   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
832 #endif
833   PetscCall(MatAssemblyBegin(aij->B, mode));
834   PetscCall(MatAssemblyEnd(aij->B, mode));
835 
836   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
837 
838   aij->rowvalues = NULL;
839 
840   PetscCall(VecDestroy(&aij->diag));
841 
842   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
843   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
844     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
845     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
846   }
847 #if defined(PETSC_HAVE_DEVICE)
848   mat->offloadmask = PETSC_OFFLOAD_BOTH;
849 #endif
850   PetscFunctionReturn(PETSC_SUCCESS);
851 }
852 
853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
854 {
855   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
856 
857   PetscFunctionBegin;
858   PetscCall(MatZeroEntries(l->A));
859   PetscCall(MatZeroEntries(l->B));
860   PetscFunctionReturn(PETSC_SUCCESS);
861 }
862 
863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
864 {
865   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
866   PetscInt   *lrows;
867   PetscInt    r, len;
868   PetscBool   cong;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
873   PetscCall(MatHasCongruentLayouts(A, &cong));
874   /* fix right-hand side if needed */
875   if (x && b) {
876     const PetscScalar *xx;
877     PetscScalar       *bb;
878 
879     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
880     PetscCall(VecGetArrayRead(x, &xx));
881     PetscCall(VecGetArray(b, &bb));
882     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
883     PetscCall(VecRestoreArrayRead(x, &xx));
884     PetscCall(VecRestoreArray(b, &bb));
885   }
886 
887   if (diag != 0.0 && cong) {
888     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
889     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
890   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
891     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
892     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
893     PetscInt    nnwA, nnwB;
894     PetscBool   nnzA, nnzB;
895 
896     nnwA = aijA->nonew;
897     nnwB = aijB->nonew;
898     nnzA = aijA->keepnonzeropattern;
899     nnzB = aijB->keepnonzeropattern;
900     if (!nnzA) {
901       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
902       aijA->nonew = 0;
903     }
904     if (!nnzB) {
905       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
906       aijB->nonew = 0;
907     }
908     /* Must zero here before the next loop */
909     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
910     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
911     for (r = 0; r < len; ++r) {
912       const PetscInt row = lrows[r] + A->rmap->rstart;
913       if (row >= A->cmap->N) continue;
914       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
915     }
916     aijA->nonew = nnwA;
917     aijB->nonew = nnwB;
918   } else {
919     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
920     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
921   }
922   PetscCall(PetscFree(lrows));
923   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
924   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
928     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
929     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
930   }
931   PetscFunctionReturn(PETSC_SUCCESS);
932 }
933 
934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
935 {
936   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
937   PetscInt           n = A->rmap->n;
938   PetscInt           i, j, r, m, len = 0;
939   PetscInt          *lrows, *owners = A->rmap->range;
940   PetscMPIInt        p = 0;
941   PetscSFNode       *rrows;
942   PetscSF            sf;
943   const PetscScalar *xx;
944   PetscScalar       *bb, *mask, *aij_a;
945   Vec                xmask, lmask;
946   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
947   const PetscInt    *aj, *ii, *ridx;
948   PetscScalar       *aa;
949 
950   PetscFunctionBegin;
951   /* Create SF where leaves are input rows and roots are owned rows */
952   PetscCall(PetscMalloc1(n, &lrows));
953   for (r = 0; r < n; ++r) lrows[r] = -1;
954   PetscCall(PetscMalloc1(N, &rrows));
955   for (r = 0; r < N; ++r) {
956     const PetscInt idx = rows[r];
957     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
958     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
959       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
960     }
961     rrows[r].rank  = p;
962     rrows[r].index = rows[r] - owners[p];
963   }
964   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
965   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
966   /* Collect flags for rows to be zeroed */
967   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
968   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
969   PetscCall(PetscSFDestroy(&sf));
970   /* Compress and put in row numbers */
971   for (r = 0; r < n; ++r)
972     if (lrows[r] >= 0) lrows[len++] = r;
973   /* zero diagonal part of matrix */
974   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
975   /* handle off-diagonal part of matrix */
976   PetscCall(MatCreateVecs(A, &xmask, NULL));
977   PetscCall(VecDuplicate(l->lvec, &lmask));
978   PetscCall(VecGetArray(xmask, &bb));
979   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
980   PetscCall(VecRestoreArray(xmask, &bb));
981   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
982   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
983   PetscCall(VecDestroy(&xmask));
984   if (x && b) { /* this code is buggy when the row and column layout don't match */
985     PetscBool cong;
986 
987     PetscCall(MatHasCongruentLayouts(A, &cong));
988     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
989     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
990     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
991     PetscCall(VecGetArrayRead(l->lvec, &xx));
992     PetscCall(VecGetArray(b, &bb));
993   }
994   PetscCall(VecGetArray(lmask, &mask));
995   /* remove zeroed rows of off-diagonal matrix */
996   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
997   ii = aij->i;
998   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
999   /* loop over all elements of off process part of matrix zeroing removed columns*/
1000   if (aij->compressedrow.use) {
1001     m    = aij->compressedrow.nrows;
1002     ii   = aij->compressedrow.i;
1003     ridx = aij->compressedrow.rindex;
1004     for (i = 0; i < m; i++) {
1005       n  = ii[i + 1] - ii[i];
1006       aj = aij->j + ii[i];
1007       aa = aij_a + ii[i];
1008 
1009       for (j = 0; j < n; j++) {
1010         if (PetscAbsScalar(mask[*aj])) {
1011           if (b) bb[*ridx] -= *aa * xx[*aj];
1012           *aa = 0.0;
1013         }
1014         aa++;
1015         aj++;
1016       }
1017       ridx++;
1018     }
1019   } else { /* do not use compressed row format */
1020     m = l->B->rmap->n;
1021     for (i = 0; i < m; i++) {
1022       n  = ii[i + 1] - ii[i];
1023       aj = aij->j + ii[i];
1024       aa = aij_a + ii[i];
1025       for (j = 0; j < n; j++) {
1026         if (PetscAbsScalar(mask[*aj])) {
1027           if (b) bb[i] -= *aa * xx[*aj];
1028           *aa = 0.0;
1029         }
1030         aa++;
1031         aj++;
1032       }
1033     }
1034   }
1035   if (x && b) {
1036     PetscCall(VecRestoreArray(b, &bb));
1037     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1038   }
1039   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1040   PetscCall(VecRestoreArray(lmask, &mask));
1041   PetscCall(VecDestroy(&lmask));
1042   PetscCall(PetscFree(lrows));
1043 
1044   /* only change matrix nonzero state if pattern was allowed to be changed */
1045   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1046     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1047     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1048   }
1049   PetscFunctionReturn(PETSC_SUCCESS);
1050 }
1051 
1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1053 {
1054   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1055   PetscInt    nt;
1056   VecScatter  Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   PetscCall(VecGetLocalSize(xx, &nt));
1060   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1061   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1062   PetscUseTypeMethod(a->A, mult, xx, yy);
1063   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1065   PetscFunctionReturn(PETSC_SUCCESS);
1066 }
1067 
1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1069 {
1070   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1071 
1072   PetscFunctionBegin;
1073   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1078 {
1079   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1080   VecScatter  Mvctx = a->Mvctx;
1081 
1082   PetscFunctionBegin;
1083   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1084   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1085   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1086   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1087   PetscFunctionReturn(PETSC_SUCCESS);
1088 }
1089 
1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1091 {
1092   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1093 
1094   PetscFunctionBegin;
1095   /* do nondiagonal part */
1096   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1097   /* do local part */
1098   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1099   /* add partial results together */
1100   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1101   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1102   PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104 
1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1106 {
1107   MPI_Comm    comm;
1108   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1109   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1110   IS          Me, Notme;
1111   PetscInt    M, N, first, last, *notme, i;
1112   PetscBool   lf;
1113   PetscMPIInt size;
1114 
1115   PetscFunctionBegin;
1116   /* Easy test: symmetric diagonal block */
1117   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1118   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1119   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1120   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1121   PetscCallMPI(MPI_Comm_size(comm, &size));
1122   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1123 
1124   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1125   PetscCall(MatGetSize(Amat, &M, &N));
1126   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1127   PetscCall(PetscMalloc1(N - last + first, &notme));
1128   for (i = 0; i < first; i++) notme[i] = i;
1129   for (i = last; i < M; i++) notme[i - last + first] = i;
1130   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1131   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1132   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1133   Aoff = Aoffs[0];
1134   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1135   Boff = Boffs[0];
1136   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1137   PetscCall(MatDestroyMatrices(1, &Aoffs));
1138   PetscCall(MatDestroyMatrices(1, &Boffs));
1139   PetscCall(ISDestroy(&Me));
1140   PetscCall(ISDestroy(&Notme));
1141   PetscCall(PetscFree(notme));
1142   PetscFunctionReturn(PETSC_SUCCESS);
1143 }
1144 
1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1146 {
1147   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1148 
1149   PetscFunctionBegin;
1150   /* do nondiagonal part */
1151   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1152   /* do local part */
1153   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1154   /* add partial results together */
1155   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1156   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1157   PetscFunctionReturn(PETSC_SUCCESS);
1158 }
1159 
1160 /*
1161   This only works correctly for square matrices where the subblock A->A is the
1162    diagonal block
1163 */
1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1165 {
1166   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1167 
1168   PetscFunctionBegin;
1169   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1170   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1171   PetscCall(MatGetDiagonal(a->A, v));
1172   PetscFunctionReturn(PETSC_SUCCESS);
1173 }
1174 
1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1176 {
1177   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(MatScale(a->A, aa));
1181   PetscCall(MatScale(a->B, aa));
1182   PetscFunctionReturn(PETSC_SUCCESS);
1183 }
1184 
1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1186 {
1187   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1188   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1189   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1190   const PetscInt    *garray = aij->garray;
1191   const PetscScalar *aa, *ba;
1192   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1193   PetscInt64         nz, hnz;
1194   PetscInt          *rowlens;
1195   PetscInt          *colidxs;
1196   PetscScalar       *matvals;
1197   PetscMPIInt        rank;
1198 
1199   PetscFunctionBegin;
1200   PetscCall(PetscViewerSetUp(viewer));
1201 
1202   M  = mat->rmap->N;
1203   N  = mat->cmap->N;
1204   m  = mat->rmap->n;
1205   rs = mat->rmap->rstart;
1206   cs = mat->cmap->rstart;
1207   nz = A->nz + B->nz;
1208 
1209   /* write matrix header */
1210   header[0] = MAT_FILE_CLASSID;
1211   header[1] = M;
1212   header[2] = N;
1213   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1214   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1215   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1216   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1217 
1218   /* fill in and store row lengths  */
1219   PetscCall(PetscMalloc1(m, &rowlens));
1220   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1221   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1222   PetscCall(PetscFree(rowlens));
1223 
1224   /* fill in and store column indices */
1225   PetscCall(PetscMalloc1(nz, &colidxs));
1226   for (cnt = 0, i = 0; i < m; i++) {
1227     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1228       if (garray[B->j[jb]] > cs) break;
1229       colidxs[cnt++] = garray[B->j[jb]];
1230     }
1231     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1232     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1233   }
1234   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1235   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1236   PetscCall(PetscFree(colidxs));
1237 
1238   /* fill in and store nonzero values */
1239   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1240   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1241   PetscCall(PetscMalloc1(nz, &matvals));
1242   for (cnt = 0, i = 0; i < m; i++) {
1243     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1244       if (garray[B->j[jb]] > cs) break;
1245       matvals[cnt++] = ba[jb];
1246     }
1247     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1248     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1249   }
1250   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1251   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1252   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1253   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1254   PetscCall(PetscFree(matvals));
1255 
1256   /* write block size option to the viewer's .info file */
1257   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1258   PetscFunctionReturn(PETSC_SUCCESS);
1259 }
1260 
1261 #include <petscdraw.h>
1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1263 {
1264   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1265   PetscMPIInt       rank = aij->rank, size = aij->size;
1266   PetscBool         isdraw, iascii, isbinary;
1267   PetscViewer       sviewer;
1268   PetscViewerFormat format;
1269 
1270   PetscFunctionBegin;
1271   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1272   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1273   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1274   if (iascii) {
1275     PetscCall(PetscViewerGetFormat(viewer, &format));
1276     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1277       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1278       PetscCall(PetscMalloc1(size, &nz));
1279       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1280       for (i = 0; i < size; i++) {
1281         nmax = PetscMax(nmax, nz[i]);
1282         nmin = PetscMin(nmin, nz[i]);
1283         navg += nz[i];
1284       }
1285       PetscCall(PetscFree(nz));
1286       navg = navg / size;
1287       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1288       PetscFunctionReturn(PETSC_SUCCESS);
1289     }
1290     PetscCall(PetscViewerGetFormat(viewer, &format));
1291     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1292       MatInfo   info;
1293       PetscInt *inodes = NULL;
1294 
1295       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1296       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1297       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1298       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1299       if (!inodes) {
1300         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1301                                                      info.memory));
1302       } else {
1303         PetscCall(
1304           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory));
1305       }
1306       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1307       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1308       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1309       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1310       PetscCall(PetscViewerFlush(viewer));
1311       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1312       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1313       PetscCall(VecScatterView(aij->Mvctx, viewer));
1314       PetscFunctionReturn(PETSC_SUCCESS);
1315     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1316       PetscInt inodecount, inodelimit, *inodes;
1317       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1318       if (inodes) {
1319         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1320       } else {
1321         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1322       }
1323       PetscFunctionReturn(PETSC_SUCCESS);
1324     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1325       PetscFunctionReturn(PETSC_SUCCESS);
1326     }
1327   } else if (isbinary) {
1328     if (size == 1) {
1329       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1330       PetscCall(MatView(aij->A, viewer));
1331     } else {
1332       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1333     }
1334     PetscFunctionReturn(PETSC_SUCCESS);
1335   } else if (iascii && size == 1) {
1336     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1337     PetscCall(MatView(aij->A, viewer));
1338     PetscFunctionReturn(PETSC_SUCCESS);
1339   } else if (isdraw) {
1340     PetscDraw draw;
1341     PetscBool isnull;
1342     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1343     PetscCall(PetscDrawIsNull(draw, &isnull));
1344     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1345   }
1346 
1347   { /* assemble the entire matrix onto first processor */
1348     Mat A = NULL, Av;
1349     IS  isrow, iscol;
1350 
1351     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1352     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1353     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1354     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1355     /*  The commented code uses MatCreateSubMatrices instead */
1356     /*
1357     Mat *AA, A = NULL, Av;
1358     IS  isrow,iscol;
1359 
1360     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1361     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1362     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1363     if (rank == 0) {
1364        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1365        A    = AA[0];
1366        Av   = AA[0];
1367     }
1368     PetscCall(MatDestroySubMatrices(1,&AA));
1369 */
1370     PetscCall(ISDestroy(&iscol));
1371     PetscCall(ISDestroy(&isrow));
1372     /*
1373        Everyone has to call to draw the matrix since the graphics waits are
1374        synchronized across all processors that share the PetscDraw object
1375     */
1376     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1377     if (rank == 0) {
1378       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1379       PetscCall(MatView_SeqAIJ(Av, sviewer));
1380     }
1381     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1382     PetscCall(MatDestroy(&A));
1383   }
1384   PetscFunctionReturn(PETSC_SUCCESS);
1385 }
1386 
1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1388 {
1389   PetscBool iascii, isdraw, issocket, isbinary;
1390 
1391   PetscFunctionBegin;
1392   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1393   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1395   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1396   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1397   PetscFunctionReturn(PETSC_SUCCESS);
1398 }
1399 
1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1401 {
1402   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1403   Vec         bb1 = NULL;
1404   PetscBool   hasop;
1405 
1406   PetscFunctionBegin;
1407   if (flag == SOR_APPLY_UPPER) {
1408     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1409     PetscFunctionReturn(PETSC_SUCCESS);
1410   }
1411 
1412   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1413 
1414   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419 
1420     while (its--) {
1421       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1423 
1424       /* update rhs: bb1 = bb - B*x */
1425       PetscCall(VecScale(mat->lvec, -1.0));
1426       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1427 
1428       /* local sweep */
1429       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1430     }
1431   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1432     if (flag & SOR_ZERO_INITIAL_GUESS) {
1433       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1434       its--;
1435     }
1436     while (its--) {
1437       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1439 
1440       /* update rhs: bb1 = bb - B*x */
1441       PetscCall(VecScale(mat->lvec, -1.0));
1442       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1443 
1444       /* local sweep */
1445       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1446     }
1447   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1448     if (flag & SOR_ZERO_INITIAL_GUESS) {
1449       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1450       its--;
1451     }
1452     while (its--) {
1453       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1455 
1456       /* update rhs: bb1 = bb - B*x */
1457       PetscCall(VecScale(mat->lvec, -1.0));
1458       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1459 
1460       /* local sweep */
1461       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1462     }
1463   } else if (flag & SOR_EISENSTAT) {
1464     Vec xx1;
1465 
1466     PetscCall(VecDuplicate(bb, &xx1));
1467     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1468 
1469     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1470     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1471     if (!mat->diag) {
1472       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1473       PetscCall(MatGetDiagonal(matin, mat->diag));
1474     }
1475     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1476     if (hasop) {
1477       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1478     } else {
1479       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1480     }
1481     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1482 
1483     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1484 
1485     /* local sweep */
1486     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1487     PetscCall(VecAXPY(xx, 1.0, xx1));
1488     PetscCall(VecDestroy(&xx1));
1489   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1490 
1491   PetscCall(VecDestroy(&bb1));
1492 
1493   matin->factorerrortype = mat->A->factorerrortype;
1494   PetscFunctionReturn(PETSC_SUCCESS);
1495 }
1496 
1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1498 {
1499   Mat             aA, aB, Aperm;
1500   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1501   PetscScalar    *aa, *ba;
1502   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1503   PetscSF         rowsf, sf;
1504   IS              parcolp = NULL;
1505   PetscBool       done;
1506 
1507   PetscFunctionBegin;
1508   PetscCall(MatGetLocalSize(A, &m, &n));
1509   PetscCall(ISGetIndices(rowp, &rwant));
1510   PetscCall(ISGetIndices(colp, &cwant));
1511   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1512 
1513   /* Invert row permutation to find out where my rows should go */
1514   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1515   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1516   PetscCall(PetscSFSetFromOptions(rowsf));
1517   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1518   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1519   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1520 
1521   /* Invert column permutation to find out where my columns should go */
1522   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1523   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1524   PetscCall(PetscSFSetFromOptions(sf));
1525   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1526   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1527   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1528   PetscCall(PetscSFDestroy(&sf));
1529 
1530   PetscCall(ISRestoreIndices(rowp, &rwant));
1531   PetscCall(ISRestoreIndices(colp, &cwant));
1532   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1533 
1534   /* Find out where my gcols should go */
1535   PetscCall(MatGetSize(aB, NULL, &ng));
1536   PetscCall(PetscMalloc1(ng, &gcdest));
1537   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1538   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1539   PetscCall(PetscSFSetFromOptions(sf));
1540   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1541   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1542   PetscCall(PetscSFDestroy(&sf));
1543 
1544   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1545   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1546   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1547   for (i = 0; i < m; i++) {
1548     PetscInt    row = rdest[i];
1549     PetscMPIInt rowner;
1550     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1551     for (j = ai[i]; j < ai[i + 1]; j++) {
1552       PetscInt    col = cdest[aj[j]];
1553       PetscMPIInt cowner;
1554       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1555       if (rowner == cowner) dnnz[i]++;
1556       else onnz[i]++;
1557     }
1558     for (j = bi[i]; j < bi[i + 1]; j++) {
1559       PetscInt    col = gcdest[bj[j]];
1560       PetscMPIInt cowner;
1561       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1562       if (rowner == cowner) dnnz[i]++;
1563       else onnz[i]++;
1564     }
1565   }
1566   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1567   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1568   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1569   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1570   PetscCall(PetscSFDestroy(&rowsf));
1571 
1572   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1573   PetscCall(MatSeqAIJGetArray(aA, &aa));
1574   PetscCall(MatSeqAIJGetArray(aB, &ba));
1575   for (i = 0; i < m; i++) {
1576     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1577     PetscInt  j0, rowlen;
1578     rowlen = ai[i + 1] - ai[i];
1579     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1580       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1581       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1582     }
1583     rowlen = bi[i + 1] - bi[i];
1584     for (j0 = j = 0; j < rowlen; j0 = j) {
1585       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1586       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1587     }
1588   }
1589   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1590   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1591   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1592   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1593   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1594   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1595   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1596   PetscCall(PetscFree3(work, rdest, cdest));
1597   PetscCall(PetscFree(gcdest));
1598   if (parcolp) PetscCall(ISDestroy(&colp));
1599   *B = Aperm;
1600   PetscFunctionReturn(PETSC_SUCCESS);
1601 }
1602 
1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1604 {
1605   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1606 
1607   PetscFunctionBegin;
1608   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1609   if (ghosts) *ghosts = aij->garray;
1610   PetscFunctionReturn(PETSC_SUCCESS);
1611 }
1612 
1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1614 {
1615   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1616   Mat            A = mat->A, B = mat->B;
1617   PetscLogDouble isend[5], irecv[5];
1618 
1619   PetscFunctionBegin;
1620   info->block_size = 1.0;
1621   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1622 
1623   isend[0] = info->nz_used;
1624   isend[1] = info->nz_allocated;
1625   isend[2] = info->nz_unneeded;
1626   isend[3] = info->memory;
1627   isend[4] = info->mallocs;
1628 
1629   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1630 
1631   isend[0] += info->nz_used;
1632   isend[1] += info->nz_allocated;
1633   isend[2] += info->nz_unneeded;
1634   isend[3] += info->memory;
1635   isend[4] += info->mallocs;
1636   if (flag == MAT_LOCAL) {
1637     info->nz_used      = isend[0];
1638     info->nz_allocated = isend[1];
1639     info->nz_unneeded  = isend[2];
1640     info->memory       = isend[3];
1641     info->mallocs      = isend[4];
1642   } else if (flag == MAT_GLOBAL_MAX) {
1643     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1644 
1645     info->nz_used      = irecv[0];
1646     info->nz_allocated = irecv[1];
1647     info->nz_unneeded  = irecv[2];
1648     info->memory       = irecv[3];
1649     info->mallocs      = irecv[4];
1650   } else if (flag == MAT_GLOBAL_SUM) {
1651     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1652 
1653     info->nz_used      = irecv[0];
1654     info->nz_allocated = irecv[1];
1655     info->nz_unneeded  = irecv[2];
1656     info->memory       = irecv[3];
1657     info->mallocs      = irecv[4];
1658   }
1659   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1660   info->fill_ratio_needed = 0;
1661   info->factor_mallocs    = 0;
1662   PetscFunctionReturn(PETSC_SUCCESS);
1663 }
1664 
1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1666 {
1667   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1668 
1669   PetscFunctionBegin;
1670   switch (op) {
1671   case MAT_NEW_NONZERO_LOCATIONS:
1672   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1673   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1674   case MAT_KEEP_NONZERO_PATTERN:
1675   case MAT_NEW_NONZERO_LOCATION_ERR:
1676   case MAT_USE_INODES:
1677   case MAT_IGNORE_ZERO_ENTRIES:
1678   case MAT_FORM_EXPLICIT_TRANSPOSE:
1679     MatCheckPreallocated(A, 1);
1680     PetscCall(MatSetOption(a->A, op, flg));
1681     PetscCall(MatSetOption(a->B, op, flg));
1682     break;
1683   case MAT_ROW_ORIENTED:
1684     MatCheckPreallocated(A, 1);
1685     a->roworiented = flg;
1686 
1687     PetscCall(MatSetOption(a->A, op, flg));
1688     PetscCall(MatSetOption(a->B, op, flg));
1689     break;
1690   case MAT_FORCE_DIAGONAL_ENTRIES:
1691   case MAT_SORTED_FULL:
1692     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1693     break;
1694   case MAT_IGNORE_OFF_PROC_ENTRIES:
1695     a->donotstash = flg;
1696     break;
1697   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1698   case MAT_SPD:
1699   case MAT_SYMMETRIC:
1700   case MAT_STRUCTURALLY_SYMMETRIC:
1701   case MAT_HERMITIAN:
1702   case MAT_SYMMETRY_ETERNAL:
1703   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1704   case MAT_SPD_ETERNAL:
1705     /* if the diagonal matrix is square it inherits some of the properties above */
1706     break;
1707   case MAT_SUBMAT_SINGLEIS:
1708     A->submat_singleis = flg;
1709     break;
1710   case MAT_STRUCTURE_ONLY:
1711     /* The option is handled directly by MatSetOption() */
1712     break;
1713   default:
1714     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1715   }
1716   PetscFunctionReturn(PETSC_SUCCESS);
1717 }
1718 
1719 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1720 {
1721   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1722   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1723   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1724   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1725   PetscInt    *cmap, *idx_p;
1726 
1727   PetscFunctionBegin;
1728   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1729   mat->getrowactive = PETSC_TRUE;
1730 
1731   if (!mat->rowvalues && (idx || v)) {
1732     /*
1733         allocate enough space to hold information from the longest row.
1734     */
1735     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1736     PetscInt    max = 1, tmp;
1737     for (i = 0; i < matin->rmap->n; i++) {
1738       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1739       if (max < tmp) max = tmp;
1740     }
1741     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1742   }
1743 
1744   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1745   lrow = row - rstart;
1746 
1747   pvA = &vworkA;
1748   pcA = &cworkA;
1749   pvB = &vworkB;
1750   pcB = &cworkB;
1751   if (!v) {
1752     pvA = NULL;
1753     pvB = NULL;
1754   }
1755   if (!idx) {
1756     pcA = NULL;
1757     if (!v) pcB = NULL;
1758   }
1759   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1760   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1761   nztot = nzA + nzB;
1762 
1763   cmap = mat->garray;
1764   if (v || idx) {
1765     if (nztot) {
1766       /* Sort by increasing column numbers, assuming A and B already sorted */
1767       PetscInt imark = -1;
1768       if (v) {
1769         *v = v_p = mat->rowvalues;
1770         for (i = 0; i < nzB; i++) {
1771           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1772           else break;
1773         }
1774         imark = i;
1775         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1776         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1777       }
1778       if (idx) {
1779         *idx = idx_p = mat->rowindices;
1780         if (imark > -1) {
1781           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1782         } else {
1783           for (i = 0; i < nzB; i++) {
1784             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1785             else break;
1786           }
1787           imark = i;
1788         }
1789         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1790         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1791       }
1792     } else {
1793       if (idx) *idx = NULL;
1794       if (v) *v = NULL;
1795     }
1796   }
1797   *nz = nztot;
1798   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1799   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1800   PetscFunctionReturn(PETSC_SUCCESS);
1801 }
1802 
1803 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1804 {
1805   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1806 
1807   PetscFunctionBegin;
1808   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1809   aij->getrowactive = PETSC_FALSE;
1810   PetscFunctionReturn(PETSC_SUCCESS);
1811 }
1812 
1813 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1814 {
1815   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1816   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1817   PetscInt         i, j, cstart = mat->cmap->rstart;
1818   PetscReal        sum = 0.0;
1819   const MatScalar *v, *amata, *bmata;
1820 
1821   PetscFunctionBegin;
1822   if (aij->size == 1) {
1823     PetscCall(MatNorm(aij->A, type, norm));
1824   } else {
1825     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1826     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1827     if (type == NORM_FROBENIUS) {
1828       v = amata;
1829       for (i = 0; i < amat->nz; i++) {
1830         sum += PetscRealPart(PetscConj(*v) * (*v));
1831         v++;
1832       }
1833       v = bmata;
1834       for (i = 0; i < bmat->nz; i++) {
1835         sum += PetscRealPart(PetscConj(*v) * (*v));
1836         v++;
1837       }
1838       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1839       *norm = PetscSqrtReal(*norm);
1840       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1841     } else if (type == NORM_1) { /* max column norm */
1842       PetscReal *tmp;
1843       PetscInt  *jj, *garray = aij->garray;
1844       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1845       *norm = 0.0;
1846       v     = amata;
1847       jj    = amat->j;
1848       for (j = 0; j < amat->nz; j++) {
1849         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1850         v++;
1851       }
1852       v  = bmata;
1853       jj = bmat->j;
1854       for (j = 0; j < bmat->nz; j++) {
1855         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1856         v++;
1857       }
1858       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1859       for (j = 0; j < mat->cmap->N; j++) {
1860         if (tmp[j] > *norm) *norm = tmp[j];
1861       }
1862       PetscCall(PetscFree(tmp));
1863       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1864     } else if (type == NORM_INFINITY) { /* max row norm */
1865       PetscReal ntemp = 0.0;
1866       for (j = 0; j < aij->A->rmap->n; j++) {
1867         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1868         sum = 0.0;
1869         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1870           sum += PetscAbsScalar(*v);
1871           v++;
1872         }
1873         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1874         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1875           sum += PetscAbsScalar(*v);
1876           v++;
1877         }
1878         if (sum > ntemp) ntemp = sum;
1879       }
1880       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1881       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1882     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1883     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1884     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1885   }
1886   PetscFunctionReturn(PETSC_SUCCESS);
1887 }
1888 
1889 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1890 {
1891   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1892   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1893   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1894   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1895   Mat              B, A_diag, *B_diag;
1896   const MatScalar *pbv, *bv;
1897 
1898   PetscFunctionBegin;
1899   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1900   ma = A->rmap->n;
1901   na = A->cmap->n;
1902   mb = a->B->rmap->n;
1903   nb = a->B->cmap->n;
1904   ai = Aloc->i;
1905   aj = Aloc->j;
1906   bi = Bloc->i;
1907   bj = Bloc->j;
1908   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1909     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1910     PetscSFNode         *oloc;
1911     PETSC_UNUSED PetscSF sf;
1912 
1913     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1914     /* compute d_nnz for preallocation */
1915     PetscCall(PetscArrayzero(d_nnz, na));
1916     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1917     /* compute local off-diagonal contributions */
1918     PetscCall(PetscArrayzero(g_nnz, nb));
1919     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1920     /* map those to global */
1921     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1922     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1923     PetscCall(PetscSFSetFromOptions(sf));
1924     PetscCall(PetscArrayzero(o_nnz, na));
1925     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1926     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1927     PetscCall(PetscSFDestroy(&sf));
1928 
1929     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1930     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1931     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1932     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1933     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1934     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1935   } else {
1936     B = *matout;
1937     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1938   }
1939 
1940   b           = (Mat_MPIAIJ *)B->data;
1941   A_diag      = a->A;
1942   B_diag      = &b->A;
1943   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1944   A_diag_ncol = A_diag->cmap->N;
1945   B_diag_ilen = sub_B_diag->ilen;
1946   B_diag_i    = sub_B_diag->i;
1947 
1948   /* Set ilen for diagonal of B */
1949   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1950 
1951   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1952   very quickly (=without using MatSetValues), because all writes are local. */
1953   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1954   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1955 
1956   /* copy over the B part */
1957   PetscCall(PetscMalloc1(bi[mb], &cols));
1958   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1959   pbv = bv;
1960   row = A->rmap->rstart;
1961   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1962   cols_tmp = cols;
1963   for (i = 0; i < mb; i++) {
1964     ncol = bi[i + 1] - bi[i];
1965     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1966     row++;
1967     if (pbv) pbv += ncol;
1968     if (cols_tmp) cols_tmp += ncol;
1969   }
1970   PetscCall(PetscFree(cols));
1971   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1972 
1973   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1974   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1975   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1976     *matout = B;
1977   } else {
1978     PetscCall(MatHeaderMerge(A, &B));
1979   }
1980   PetscFunctionReturn(PETSC_SUCCESS);
1981 }
1982 
1983 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1984 {
1985   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1986   Mat         a = aij->A, b = aij->B;
1987   PetscInt    s1, s2, s3;
1988 
1989   PetscFunctionBegin;
1990   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1991   if (rr) {
1992     PetscCall(VecGetLocalSize(rr, &s1));
1993     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1994     /* Overlap communication with computation. */
1995     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1996   }
1997   if (ll) {
1998     PetscCall(VecGetLocalSize(ll, &s1));
1999     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
2000     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
2001   }
2002   /* scale  the diagonal block */
2003   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2004 
2005   if (rr) {
2006     /* Do a scatter end and then right scale the off-diagonal block */
2007     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2008     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2009   }
2010   PetscFunctionReturn(PETSC_SUCCESS);
2011 }
2012 
2013 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2014 {
2015   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2016 
2017   PetscFunctionBegin;
2018   PetscCall(MatSetUnfactored(a->A));
2019   PetscFunctionReturn(PETSC_SUCCESS);
2020 }
2021 
2022 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2023 {
2024   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2025   Mat         a, b, c, d;
2026   PetscBool   flg;
2027 
2028   PetscFunctionBegin;
2029   a = matA->A;
2030   b = matA->B;
2031   c = matB->A;
2032   d = matB->B;
2033 
2034   PetscCall(MatEqual(a, c, &flg));
2035   if (flg) PetscCall(MatEqual(b, d, &flg));
2036   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2037   PetscFunctionReturn(PETSC_SUCCESS);
2038 }
2039 
2040 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2041 {
2042   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2043   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2044 
2045   PetscFunctionBegin;
2046   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2047   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2048     /* because of the column compression in the off-processor part of the matrix a->B,
2049        the number of columns in a->B and b->B may be different, hence we cannot call
2050        the MatCopy() directly on the two parts. If need be, we can provide a more
2051        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2052        then copying the submatrices */
2053     PetscCall(MatCopy_Basic(A, B, str));
2054   } else {
2055     PetscCall(MatCopy(a->A, b->A, str));
2056     PetscCall(MatCopy(a->B, b->B, str));
2057   }
2058   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2059   PetscFunctionReturn(PETSC_SUCCESS);
2060 }
2061 
2062 /*
2063    Computes the number of nonzeros per row needed for preallocation when X and Y
2064    have different nonzero structure.
2065 */
2066 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2067 {
2068   PetscInt i, j, k, nzx, nzy;
2069 
2070   PetscFunctionBegin;
2071   /* Set the number of nonzeros in the new matrix */
2072   for (i = 0; i < m; i++) {
2073     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2074     nzx    = xi[i + 1] - xi[i];
2075     nzy    = yi[i + 1] - yi[i];
2076     nnz[i] = 0;
2077     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2078       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2079       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2080       nnz[i]++;
2081     }
2082     for (; k < nzy; k++) nnz[i]++;
2083   }
2084   PetscFunctionReturn(PETSC_SUCCESS);
2085 }
2086 
2087 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2088 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2089 {
2090   PetscInt    m = Y->rmap->N;
2091   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2092   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2093 
2094   PetscFunctionBegin;
2095   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2096   PetscFunctionReturn(PETSC_SUCCESS);
2097 }
2098 
2099 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2100 {
2101   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2102 
2103   PetscFunctionBegin;
2104   if (str == SAME_NONZERO_PATTERN) {
2105     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2106     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2107   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2108     PetscCall(MatAXPY_Basic(Y, a, X, str));
2109   } else {
2110     Mat       B;
2111     PetscInt *nnz_d, *nnz_o;
2112 
2113     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2114     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2115     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2116     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2117     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2118     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2119     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2120     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2121     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2122     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2123     PetscCall(MatHeaderMerge(Y, &B));
2124     PetscCall(PetscFree(nnz_d));
2125     PetscCall(PetscFree(nnz_o));
2126   }
2127   PetscFunctionReturn(PETSC_SUCCESS);
2128 }
2129 
2130 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2131 
2132 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2133 {
2134   PetscFunctionBegin;
2135   if (PetscDefined(USE_COMPLEX)) {
2136     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2137 
2138     PetscCall(MatConjugate_SeqAIJ(aij->A));
2139     PetscCall(MatConjugate_SeqAIJ(aij->B));
2140   }
2141   PetscFunctionReturn(PETSC_SUCCESS);
2142 }
2143 
2144 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2145 {
2146   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2147 
2148   PetscFunctionBegin;
2149   PetscCall(MatRealPart(a->A));
2150   PetscCall(MatRealPart(a->B));
2151   PetscFunctionReturn(PETSC_SUCCESS);
2152 }
2153 
2154 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2155 {
2156   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2157 
2158   PetscFunctionBegin;
2159   PetscCall(MatImaginaryPart(a->A));
2160   PetscCall(MatImaginaryPart(a->B));
2161   PetscFunctionReturn(PETSC_SUCCESS);
2162 }
2163 
2164 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2165 {
2166   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2167   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2168   PetscScalar       *va, *vv;
2169   Vec                vB, vA;
2170   const PetscScalar *vb;
2171 
2172   PetscFunctionBegin;
2173   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2174   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2175 
2176   PetscCall(VecGetArrayWrite(vA, &va));
2177   if (idx) {
2178     for (i = 0; i < m; i++) {
2179       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2180     }
2181   }
2182 
2183   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2184   PetscCall(PetscMalloc1(m, &idxb));
2185   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2186 
2187   PetscCall(VecGetArrayWrite(v, &vv));
2188   PetscCall(VecGetArrayRead(vB, &vb));
2189   for (i = 0; i < m; i++) {
2190     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2191       vv[i] = vb[i];
2192       if (idx) idx[i] = a->garray[idxb[i]];
2193     } else {
2194       vv[i] = va[i];
2195       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2196     }
2197   }
2198   PetscCall(VecRestoreArrayWrite(vA, &vv));
2199   PetscCall(VecRestoreArrayWrite(vA, &va));
2200   PetscCall(VecRestoreArrayRead(vB, &vb));
2201   PetscCall(PetscFree(idxb));
2202   PetscCall(VecDestroy(&vA));
2203   PetscCall(VecDestroy(&vB));
2204   PetscFunctionReturn(PETSC_SUCCESS);
2205 }
2206 
2207 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2208 {
2209   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2210   Vec         vB, vA;
2211 
2212   PetscFunctionBegin;
2213   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2214   PetscCall(MatGetRowSumAbs(a->A, vA));
2215   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2216   PetscCall(MatGetRowSumAbs(a->B, vB));
2217   PetscCall(VecAXPY(vA, 1.0, vB));
2218   PetscCall(VecDestroy(&vB));
2219   PetscCall(VecCopy(vA, v));
2220   PetscCall(VecDestroy(&vA));
2221   PetscFunctionReturn(PETSC_SUCCESS);
2222 }
2223 
2224 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2225 {
2226   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2227   PetscInt           m = A->rmap->n, n = A->cmap->n;
2228   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2229   PetscInt          *cmap = mat->garray;
2230   PetscInt          *diagIdx, *offdiagIdx;
2231   Vec                diagV, offdiagV;
2232   PetscScalar       *a, *diagA, *offdiagA;
2233   const PetscScalar *ba, *bav;
2234   PetscInt           r, j, col, ncols, *bi, *bj;
2235   Mat                B = mat->B;
2236   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2237 
2238   PetscFunctionBegin;
2239   /* When a process holds entire A and other processes have no entry */
2240   if (A->cmap->N == n) {
2241     PetscCall(VecGetArrayWrite(v, &diagA));
2242     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2243     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2244     PetscCall(VecDestroy(&diagV));
2245     PetscCall(VecRestoreArrayWrite(v, &diagA));
2246     PetscFunctionReturn(PETSC_SUCCESS);
2247   } else if (n == 0) {
2248     if (m) {
2249       PetscCall(VecGetArrayWrite(v, &a));
2250       for (r = 0; r < m; r++) {
2251         a[r] = 0.0;
2252         if (idx) idx[r] = -1;
2253       }
2254       PetscCall(VecRestoreArrayWrite(v, &a));
2255     }
2256     PetscFunctionReturn(PETSC_SUCCESS);
2257   }
2258 
2259   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2260   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2261   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2262   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2263 
2264   /* Get offdiagIdx[] for implicit 0.0 */
2265   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2266   ba = bav;
2267   bi = b->i;
2268   bj = b->j;
2269   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2270   for (r = 0; r < m; r++) {
2271     ncols = bi[r + 1] - bi[r];
2272     if (ncols == A->cmap->N - n) { /* Brow is dense */
2273       offdiagA[r]   = *ba;
2274       offdiagIdx[r] = cmap[0];
2275     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2276       offdiagA[r] = 0.0;
2277 
2278       /* Find first hole in the cmap */
2279       for (j = 0; j < ncols; j++) {
2280         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2281         if (col > j && j < cstart) {
2282           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2283           break;
2284         } else if (col > j + n && j >= cstart) {
2285           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2286           break;
2287         }
2288       }
2289       if (j == ncols && ncols < A->cmap->N - n) {
2290         /* a hole is outside compressed Bcols */
2291         if (ncols == 0) {
2292           if (cstart) {
2293             offdiagIdx[r] = 0;
2294           } else offdiagIdx[r] = cend;
2295         } else { /* ncols > 0 */
2296           offdiagIdx[r] = cmap[ncols - 1] + 1;
2297           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2298         }
2299       }
2300     }
2301 
2302     for (j = 0; j < ncols; j++) {
2303       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2304         offdiagA[r]   = *ba;
2305         offdiagIdx[r] = cmap[*bj];
2306       }
2307       ba++;
2308       bj++;
2309     }
2310   }
2311 
2312   PetscCall(VecGetArrayWrite(v, &a));
2313   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2314   for (r = 0; r < m; ++r) {
2315     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2316       a[r] = diagA[r];
2317       if (idx) idx[r] = cstart + diagIdx[r];
2318     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2319       a[r] = diagA[r];
2320       if (idx) {
2321         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2322           idx[r] = cstart + diagIdx[r];
2323         } else idx[r] = offdiagIdx[r];
2324       }
2325     } else {
2326       a[r] = offdiagA[r];
2327       if (idx) idx[r] = offdiagIdx[r];
2328     }
2329   }
2330   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2331   PetscCall(VecRestoreArrayWrite(v, &a));
2332   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2333   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2334   PetscCall(VecDestroy(&diagV));
2335   PetscCall(VecDestroy(&offdiagV));
2336   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2337   PetscFunctionReturn(PETSC_SUCCESS);
2338 }
2339 
2340 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2341 {
2342   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2343   PetscInt           m = A->rmap->n, n = A->cmap->n;
2344   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2345   PetscInt          *cmap = mat->garray;
2346   PetscInt          *diagIdx, *offdiagIdx;
2347   Vec                diagV, offdiagV;
2348   PetscScalar       *a, *diagA, *offdiagA;
2349   const PetscScalar *ba, *bav;
2350   PetscInt           r, j, col, ncols, *bi, *bj;
2351   Mat                B = mat->B;
2352   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2353 
2354   PetscFunctionBegin;
2355   /* When a process holds entire A and other processes have no entry */
2356   if (A->cmap->N == n) {
2357     PetscCall(VecGetArrayWrite(v, &diagA));
2358     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2359     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2360     PetscCall(VecDestroy(&diagV));
2361     PetscCall(VecRestoreArrayWrite(v, &diagA));
2362     PetscFunctionReturn(PETSC_SUCCESS);
2363   } else if (n == 0) {
2364     if (m) {
2365       PetscCall(VecGetArrayWrite(v, &a));
2366       for (r = 0; r < m; r++) {
2367         a[r] = PETSC_MAX_REAL;
2368         if (idx) idx[r] = -1;
2369       }
2370       PetscCall(VecRestoreArrayWrite(v, &a));
2371     }
2372     PetscFunctionReturn(PETSC_SUCCESS);
2373   }
2374 
2375   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2376   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2377   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2378   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2379 
2380   /* Get offdiagIdx[] for implicit 0.0 */
2381   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2382   ba = bav;
2383   bi = b->i;
2384   bj = b->j;
2385   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2386   for (r = 0; r < m; r++) {
2387     ncols = bi[r + 1] - bi[r];
2388     if (ncols == A->cmap->N - n) { /* Brow is dense */
2389       offdiagA[r]   = *ba;
2390       offdiagIdx[r] = cmap[0];
2391     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2392       offdiagA[r] = 0.0;
2393 
2394       /* Find first hole in the cmap */
2395       for (j = 0; j < ncols; j++) {
2396         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2397         if (col > j && j < cstart) {
2398           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2399           break;
2400         } else if (col > j + n && j >= cstart) {
2401           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2402           break;
2403         }
2404       }
2405       if (j == ncols && ncols < A->cmap->N - n) {
2406         /* a hole is outside compressed Bcols */
2407         if (ncols == 0) {
2408           if (cstart) {
2409             offdiagIdx[r] = 0;
2410           } else offdiagIdx[r] = cend;
2411         } else { /* ncols > 0 */
2412           offdiagIdx[r] = cmap[ncols - 1] + 1;
2413           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2414         }
2415       }
2416     }
2417 
2418     for (j = 0; j < ncols; j++) {
2419       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2420         offdiagA[r]   = *ba;
2421         offdiagIdx[r] = cmap[*bj];
2422       }
2423       ba++;
2424       bj++;
2425     }
2426   }
2427 
2428   PetscCall(VecGetArrayWrite(v, &a));
2429   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2430   for (r = 0; r < m; ++r) {
2431     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2432       a[r] = diagA[r];
2433       if (idx) idx[r] = cstart + diagIdx[r];
2434     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2435       a[r] = diagA[r];
2436       if (idx) {
2437         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2438           idx[r] = cstart + diagIdx[r];
2439         } else idx[r] = offdiagIdx[r];
2440       }
2441     } else {
2442       a[r] = offdiagA[r];
2443       if (idx) idx[r] = offdiagIdx[r];
2444     }
2445   }
2446   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2447   PetscCall(VecRestoreArrayWrite(v, &a));
2448   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2449   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2450   PetscCall(VecDestroy(&diagV));
2451   PetscCall(VecDestroy(&offdiagV));
2452   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2453   PetscFunctionReturn(PETSC_SUCCESS);
2454 }
2455 
2456 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2457 {
2458   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2459   PetscInt           m = A->rmap->n, n = A->cmap->n;
2460   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2461   PetscInt          *cmap = mat->garray;
2462   PetscInt          *diagIdx, *offdiagIdx;
2463   Vec                diagV, offdiagV;
2464   PetscScalar       *a, *diagA, *offdiagA;
2465   const PetscScalar *ba, *bav;
2466   PetscInt           r, j, col, ncols, *bi, *bj;
2467   Mat                B = mat->B;
2468   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2469 
2470   PetscFunctionBegin;
2471   /* When a process holds entire A and other processes have no entry */
2472   if (A->cmap->N == n) {
2473     PetscCall(VecGetArrayWrite(v, &diagA));
2474     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2475     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2476     PetscCall(VecDestroy(&diagV));
2477     PetscCall(VecRestoreArrayWrite(v, &diagA));
2478     PetscFunctionReturn(PETSC_SUCCESS);
2479   } else if (n == 0) {
2480     if (m) {
2481       PetscCall(VecGetArrayWrite(v, &a));
2482       for (r = 0; r < m; r++) {
2483         a[r] = PETSC_MIN_REAL;
2484         if (idx) idx[r] = -1;
2485       }
2486       PetscCall(VecRestoreArrayWrite(v, &a));
2487     }
2488     PetscFunctionReturn(PETSC_SUCCESS);
2489   }
2490 
2491   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2492   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2493   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2494   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2495 
2496   /* Get offdiagIdx[] for implicit 0.0 */
2497   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2498   ba = bav;
2499   bi = b->i;
2500   bj = b->j;
2501   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2502   for (r = 0; r < m; r++) {
2503     ncols = bi[r + 1] - bi[r];
2504     if (ncols == A->cmap->N - n) { /* Brow is dense */
2505       offdiagA[r]   = *ba;
2506       offdiagIdx[r] = cmap[0];
2507     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2508       offdiagA[r] = 0.0;
2509 
2510       /* Find first hole in the cmap */
2511       for (j = 0; j < ncols; j++) {
2512         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2513         if (col > j && j < cstart) {
2514           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2515           break;
2516         } else if (col > j + n && j >= cstart) {
2517           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2518           break;
2519         }
2520       }
2521       if (j == ncols && ncols < A->cmap->N - n) {
2522         /* a hole is outside compressed Bcols */
2523         if (ncols == 0) {
2524           if (cstart) {
2525             offdiagIdx[r] = 0;
2526           } else offdiagIdx[r] = cend;
2527         } else { /* ncols > 0 */
2528           offdiagIdx[r] = cmap[ncols - 1] + 1;
2529           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2530         }
2531       }
2532     }
2533 
2534     for (j = 0; j < ncols; j++) {
2535       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2536         offdiagA[r]   = *ba;
2537         offdiagIdx[r] = cmap[*bj];
2538       }
2539       ba++;
2540       bj++;
2541     }
2542   }
2543 
2544   PetscCall(VecGetArrayWrite(v, &a));
2545   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2546   for (r = 0; r < m; ++r) {
2547     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2548       a[r] = diagA[r];
2549       if (idx) idx[r] = cstart + diagIdx[r];
2550     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2551       a[r] = diagA[r];
2552       if (idx) {
2553         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2554           idx[r] = cstart + diagIdx[r];
2555         } else idx[r] = offdiagIdx[r];
2556       }
2557     } else {
2558       a[r] = offdiagA[r];
2559       if (idx) idx[r] = offdiagIdx[r];
2560     }
2561   }
2562   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2563   PetscCall(VecRestoreArrayWrite(v, &a));
2564   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2565   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2566   PetscCall(VecDestroy(&diagV));
2567   PetscCall(VecDestroy(&offdiagV));
2568   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2569   PetscFunctionReturn(PETSC_SUCCESS);
2570 }
2571 
2572 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2573 {
2574   Mat *dummy;
2575 
2576   PetscFunctionBegin;
2577   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2578   *newmat = *dummy;
2579   PetscCall(PetscFree(dummy));
2580   PetscFunctionReturn(PETSC_SUCCESS);
2581 }
2582 
2583 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2584 {
2585   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2586 
2587   PetscFunctionBegin;
2588   PetscCall(MatInvertBlockDiagonal(a->A, values));
2589   A->factorerrortype = a->A->factorerrortype;
2590   PetscFunctionReturn(PETSC_SUCCESS);
2591 }
2592 
2593 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2594 {
2595   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2596 
2597   PetscFunctionBegin;
2598   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2599   PetscCall(MatSetRandom(aij->A, rctx));
2600   if (x->assembled) {
2601     PetscCall(MatSetRandom(aij->B, rctx));
2602   } else {
2603     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2604   }
2605   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2606   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2607   PetscFunctionReturn(PETSC_SUCCESS);
2608 }
2609 
2610 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2611 {
2612   PetscFunctionBegin;
2613   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2614   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2615   PetscFunctionReturn(PETSC_SUCCESS);
2616 }
2617 
2618 /*@
2619   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2620 
2621   Not Collective
2622 
2623   Input Parameter:
2624 . A - the matrix
2625 
2626   Output Parameter:
2627 . nz - the number of nonzeros
2628 
2629   Level: advanced
2630 
2631 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2632 @*/
2633 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2634 {
2635   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2636   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2637   PetscBool   isaij;
2638 
2639   PetscFunctionBegin;
2640   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2641   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2642   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2643   PetscFunctionReturn(PETSC_SUCCESS);
2644 }
2645 
2646 /*@
2647   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2648 
2649   Collective
2650 
2651   Input Parameters:
2652 + A  - the matrix
2653 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2654 
2655   Level: advanced
2656 
2657 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2658 @*/
2659 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2660 {
2661   PetscFunctionBegin;
2662   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2663   PetscFunctionReturn(PETSC_SUCCESS);
2664 }
2665 
2666 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2667 {
2668   PetscBool sc = PETSC_FALSE, flg;
2669 
2670   PetscFunctionBegin;
2671   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2672   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2673   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2674   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2675   PetscOptionsHeadEnd();
2676   PetscFunctionReturn(PETSC_SUCCESS);
2677 }
2678 
2679 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2680 {
2681   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2682   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2683 
2684   PetscFunctionBegin;
2685   if (!Y->preallocated) {
2686     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2687   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2688     PetscInt nonew = aij->nonew;
2689     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2690     aij->nonew = nonew;
2691   }
2692   PetscCall(MatShift_Basic(Y, a));
2693   PetscFunctionReturn(PETSC_SUCCESS);
2694 }
2695 
2696 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2697 {
2698   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2699 
2700   PetscFunctionBegin;
2701   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2702   PetscCall(MatMissingDiagonal(a->A, missing, d));
2703   if (d) {
2704     PetscInt rstart;
2705     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2706     *d += rstart;
2707   }
2708   PetscFunctionReturn(PETSC_SUCCESS);
2709 }
2710 
2711 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2712 {
2713   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2714 
2715   PetscFunctionBegin;
2716   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2717   PetscFunctionReturn(PETSC_SUCCESS);
2718 }
2719 
2720 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2721 {
2722   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2723 
2724   PetscFunctionBegin;
2725   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2726   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2727   PetscFunctionReturn(PETSC_SUCCESS);
2728 }
2729 
2730 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2731                                        MatGetRow_MPIAIJ,
2732                                        MatRestoreRow_MPIAIJ,
2733                                        MatMult_MPIAIJ,
2734                                        /* 4*/ MatMultAdd_MPIAIJ,
2735                                        MatMultTranspose_MPIAIJ,
2736                                        MatMultTransposeAdd_MPIAIJ,
2737                                        NULL,
2738                                        NULL,
2739                                        NULL,
2740                                        /*10*/ NULL,
2741                                        NULL,
2742                                        NULL,
2743                                        MatSOR_MPIAIJ,
2744                                        MatTranspose_MPIAIJ,
2745                                        /*15*/ MatGetInfo_MPIAIJ,
2746                                        MatEqual_MPIAIJ,
2747                                        MatGetDiagonal_MPIAIJ,
2748                                        MatDiagonalScale_MPIAIJ,
2749                                        MatNorm_MPIAIJ,
2750                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2751                                        MatAssemblyEnd_MPIAIJ,
2752                                        MatSetOption_MPIAIJ,
2753                                        MatZeroEntries_MPIAIJ,
2754                                        /*24*/ MatZeroRows_MPIAIJ,
2755                                        NULL,
2756                                        NULL,
2757                                        NULL,
2758                                        NULL,
2759                                        /*29*/ MatSetUp_MPI_Hash,
2760                                        NULL,
2761                                        NULL,
2762                                        MatGetDiagonalBlock_MPIAIJ,
2763                                        NULL,
2764                                        /*34*/ MatDuplicate_MPIAIJ,
2765                                        NULL,
2766                                        NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        /*39*/ MatAXPY_MPIAIJ,
2770                                        MatCreateSubMatrices_MPIAIJ,
2771                                        MatIncreaseOverlap_MPIAIJ,
2772                                        MatGetValues_MPIAIJ,
2773                                        MatCopy_MPIAIJ,
2774                                        /*44*/ MatGetRowMax_MPIAIJ,
2775                                        MatScale_MPIAIJ,
2776                                        MatShift_MPIAIJ,
2777                                        MatDiagonalSet_MPIAIJ,
2778                                        MatZeroRowsColumns_MPIAIJ,
2779                                        /*49*/ MatSetRandom_MPIAIJ,
2780                                        MatGetRowIJ_MPIAIJ,
2781                                        MatRestoreRowIJ_MPIAIJ,
2782                                        NULL,
2783                                        NULL,
2784                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2785                                        NULL,
2786                                        MatSetUnfactored_MPIAIJ,
2787                                        MatPermute_MPIAIJ,
2788                                        NULL,
2789                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2790                                        MatDestroy_MPIAIJ,
2791                                        MatView_MPIAIJ,
2792                                        NULL,
2793                                        NULL,
2794                                        /*64*/ NULL,
2795                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2800                                        MatGetRowMinAbs_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        /*75*/ MatFDColoringApply_AIJ,
2806                                        MatSetFromOptions_MPIAIJ,
2807                                        NULL,
2808                                        NULL,
2809                                        MatFindZeroDiagonals_MPIAIJ,
2810                                        /*80*/ NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        /*83*/ MatLoad_MPIAIJ,
2814                                        NULL,
2815                                        NULL,
2816                                        NULL,
2817                                        NULL,
2818                                        NULL,
2819                                        /*89*/ NULL,
2820                                        NULL,
2821                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2822                                        NULL,
2823                                        NULL,
2824                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2825                                        NULL,
2826                                        NULL,
2827                                        NULL,
2828                                        MatBindToCPU_MPIAIJ,
2829                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2830                                        NULL,
2831                                        NULL,
2832                                        MatConjugate_MPIAIJ,
2833                                        NULL,
2834                                        /*104*/ MatSetValuesRow_MPIAIJ,
2835                                        MatRealPart_MPIAIJ,
2836                                        MatImaginaryPart_MPIAIJ,
2837                                        NULL,
2838                                        NULL,
2839                                        /*109*/ NULL,
2840                                        NULL,
2841                                        MatGetRowMin_MPIAIJ,
2842                                        NULL,
2843                                        MatMissingDiagonal_MPIAIJ,
2844                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2845                                        NULL,
2846                                        MatGetGhosts_MPIAIJ,
2847                                        NULL,
2848                                        NULL,
2849                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2850                                        NULL,
2851                                        NULL,
2852                                        NULL,
2853                                        MatGetMultiProcBlock_MPIAIJ,
2854                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2855                                        MatGetColumnReductions_MPIAIJ,
2856                                        MatInvertBlockDiagonal_MPIAIJ,
2857                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2858                                        MatCreateSubMatricesMPI_MPIAIJ,
2859                                        /*129*/ NULL,
2860                                        NULL,
2861                                        NULL,
2862                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2863                                        NULL,
2864                                        /*134*/ NULL,
2865                                        NULL,
2866                                        NULL,
2867                                        NULL,
2868                                        NULL,
2869                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2870                                        NULL,
2871                                        NULL,
2872                                        MatFDColoringSetUp_MPIXAIJ,
2873                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2874                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2875                                        /*145*/ NULL,
2876                                        NULL,
2877                                        NULL,
2878                                        MatCreateGraph_Simple_AIJ,
2879                                        NULL,
2880                                        /*150*/ NULL,
2881                                        MatEliminateZeros_MPIAIJ,
2882                                        MatGetRowSumAbs_MPIAIJ,
2883                                        NULL,
2884                                        NULL,
2885                                        /*155*/ NULL,
2886                                        MatCopyHashToXAIJ_MPI_Hash};
2887 
2888 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2889 {
2890   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2891 
2892   PetscFunctionBegin;
2893   PetscCall(MatStoreValues(aij->A));
2894   PetscCall(MatStoreValues(aij->B));
2895   PetscFunctionReturn(PETSC_SUCCESS);
2896 }
2897 
2898 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2899 {
2900   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2901 
2902   PetscFunctionBegin;
2903   PetscCall(MatRetrieveValues(aij->A));
2904   PetscCall(MatRetrieveValues(aij->B));
2905   PetscFunctionReturn(PETSC_SUCCESS);
2906 }
2907 
2908 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2909 {
2910   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2911   PetscMPIInt size;
2912 
2913   PetscFunctionBegin;
2914   if (B->hash_active) {
2915     B->ops[0]      = b->cops;
2916     B->hash_active = PETSC_FALSE;
2917   }
2918   PetscCall(PetscLayoutSetUp(B->rmap));
2919   PetscCall(PetscLayoutSetUp(B->cmap));
2920 
2921 #if defined(PETSC_USE_CTABLE)
2922   PetscCall(PetscHMapIDestroy(&b->colmap));
2923 #else
2924   PetscCall(PetscFree(b->colmap));
2925 #endif
2926   PetscCall(PetscFree(b->garray));
2927   PetscCall(VecDestroy(&b->lvec));
2928   PetscCall(VecScatterDestroy(&b->Mvctx));
2929 
2930   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2931 
2932   MatSeqXAIJGetOptions_Private(b->B);
2933   PetscCall(MatDestroy(&b->B));
2934   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2935   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2936   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2937   PetscCall(MatSetType(b->B, MATSEQAIJ));
2938   MatSeqXAIJRestoreOptions_Private(b->B);
2939 
2940   MatSeqXAIJGetOptions_Private(b->A);
2941   PetscCall(MatDestroy(&b->A));
2942   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2943   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2944   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2945   PetscCall(MatSetType(b->A, MATSEQAIJ));
2946   MatSeqXAIJRestoreOptions_Private(b->A);
2947 
2948   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2949   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2950   B->preallocated  = PETSC_TRUE;
2951   B->was_assembled = PETSC_FALSE;
2952   B->assembled     = PETSC_FALSE;
2953   PetscFunctionReturn(PETSC_SUCCESS);
2954 }
2955 
2956 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2957 {
2958   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2959 
2960   PetscFunctionBegin;
2961   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2962   PetscCall(PetscLayoutSetUp(B->rmap));
2963   PetscCall(PetscLayoutSetUp(B->cmap));
2964   if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2965   else {
2966 #if defined(PETSC_USE_CTABLE)
2967     PetscCall(PetscHMapIDestroy(&b->colmap));
2968 #else
2969     PetscCall(PetscFree(b->colmap));
2970 #endif
2971     PetscCall(PetscFree(b->garray));
2972     PetscCall(VecDestroy(&b->lvec));
2973   }
2974   PetscCall(VecScatterDestroy(&b->Mvctx));
2975 
2976   PetscCall(MatResetPreallocation(b->A));
2977   PetscCall(MatResetPreallocation(b->B));
2978   B->preallocated  = PETSC_TRUE;
2979   B->was_assembled = PETSC_FALSE;
2980   B->assembled     = PETSC_FALSE;
2981   PetscFunctionReturn(PETSC_SUCCESS);
2982 }
2983 
2984 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2985 {
2986   Mat         mat;
2987   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2988 
2989   PetscFunctionBegin;
2990   *newmat = NULL;
2991   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2992   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2993   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2994   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2995   a = (Mat_MPIAIJ *)mat->data;
2996 
2997   mat->factortype = matin->factortype;
2998   mat->assembled  = matin->assembled;
2999   mat->insertmode = NOT_SET_VALUES;
3000 
3001   a->size         = oldmat->size;
3002   a->rank         = oldmat->rank;
3003   a->donotstash   = oldmat->donotstash;
3004   a->roworiented  = oldmat->roworiented;
3005   a->rowindices   = NULL;
3006   a->rowvalues    = NULL;
3007   a->getrowactive = PETSC_FALSE;
3008 
3009   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3010   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3011   if (matin->hash_active) {
3012     PetscCall(MatSetUp(mat));
3013   } else {
3014     mat->preallocated = matin->preallocated;
3015     if (oldmat->colmap) {
3016 #if defined(PETSC_USE_CTABLE)
3017       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3018 #else
3019       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3020       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3021 #endif
3022     } else a->colmap = NULL;
3023     if (oldmat->garray) {
3024       PetscInt len;
3025       len = oldmat->B->cmap->n;
3026       PetscCall(PetscMalloc1(len + 1, &a->garray));
3027       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3028     } else a->garray = NULL;
3029 
3030     /* It may happen MatDuplicate is called with a non-assembled matrix
3031       In fact, MatDuplicate only requires the matrix to be preallocated
3032       This may happen inside a DMCreateMatrix_Shell */
3033     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3034     if (oldmat->Mvctx) {
3035       a->Mvctx = oldmat->Mvctx;
3036       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3037     }
3038     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3039     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3040   }
3041   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3042   *newmat = mat;
3043   PetscFunctionReturn(PETSC_SUCCESS);
3044 }
3045 
3046 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3047 {
3048   PetscBool isbinary, ishdf5;
3049 
3050   PetscFunctionBegin;
3051   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3052   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3053   /* force binary viewer to load .info file if it has not yet done so */
3054   PetscCall(PetscViewerSetUp(viewer));
3055   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3056   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3057   if (isbinary) {
3058     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3059   } else if (ishdf5) {
3060 #if defined(PETSC_HAVE_HDF5)
3061     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3062 #else
3063     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3064 #endif
3065   } else {
3066     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3067   }
3068   PetscFunctionReturn(PETSC_SUCCESS);
3069 }
3070 
3071 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3072 {
3073   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3074   PetscInt    *rowidxs, *colidxs;
3075   PetscScalar *matvals;
3076 
3077   PetscFunctionBegin;
3078   PetscCall(PetscViewerSetUp(viewer));
3079 
3080   /* read in matrix header */
3081   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3082   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3083   M  = header[1];
3084   N  = header[2];
3085   nz = header[3];
3086   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3087   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3088   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3089 
3090   /* set block sizes from the viewer's .info file */
3091   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3092   /* set global sizes if not set already */
3093   if (mat->rmap->N < 0) mat->rmap->N = M;
3094   if (mat->cmap->N < 0) mat->cmap->N = N;
3095   PetscCall(PetscLayoutSetUp(mat->rmap));
3096   PetscCall(PetscLayoutSetUp(mat->cmap));
3097 
3098   /* check if the matrix sizes are correct */
3099   PetscCall(MatGetSize(mat, &rows, &cols));
3100   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3101 
3102   /* read in row lengths and build row indices */
3103   PetscCall(MatGetLocalSize(mat, &m, NULL));
3104   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3105   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3106   rowidxs[0] = 0;
3107   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3108   if (nz != PETSC_INT_MAX) {
3109     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3110     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3111   }
3112 
3113   /* read in column indices and matrix values */
3114   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3115   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3116   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3117   /* store matrix indices and values */
3118   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3119   PetscCall(PetscFree(rowidxs));
3120   PetscCall(PetscFree2(colidxs, matvals));
3121   PetscFunctionReturn(PETSC_SUCCESS);
3122 }
3123 
3124 /* Not scalable because of ISAllGather() unless getting all columns. */
3125 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3126 {
3127   IS          iscol_local;
3128   PetscBool   isstride;
3129   PetscMPIInt lisstride = 0, gisstride;
3130 
3131   PetscFunctionBegin;
3132   /* check if we are grabbing all columns*/
3133   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3134 
3135   if (isstride) {
3136     PetscInt start, len, mstart, mlen;
3137     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3138     PetscCall(ISGetLocalSize(iscol, &len));
3139     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3140     if (mstart == start && mlen - mstart == len) lisstride = 1;
3141   }
3142 
3143   PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3144   if (gisstride) {
3145     PetscInt N;
3146     PetscCall(MatGetSize(mat, NULL, &N));
3147     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3148     PetscCall(ISSetIdentity(iscol_local));
3149     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3150   } else {
3151     PetscInt cbs;
3152     PetscCall(ISGetBlockSize(iscol, &cbs));
3153     PetscCall(ISAllGather(iscol, &iscol_local));
3154     PetscCall(ISSetBlockSize(iscol_local, cbs));
3155   }
3156 
3157   *isseq = iscol_local;
3158   PetscFunctionReturn(PETSC_SUCCESS);
3159 }
3160 
3161 /*
3162  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3163  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3164 
3165  Input Parameters:
3166 +   mat - matrix
3167 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3168            i.e., mat->rstart <= isrow[i] < mat->rend
3169 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3170            i.e., mat->cstart <= iscol[i] < mat->cend
3171 
3172  Output Parameters:
3173 +   isrow_d - sequential row index set for retrieving mat->A
3174 .   iscol_d - sequential  column index set for retrieving mat->A
3175 .   iscol_o - sequential column index set for retrieving mat->B
3176 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3177  */
3178 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[])
3179 {
3180   Vec             x, cmap;
3181   const PetscInt *is_idx;
3182   PetscScalar    *xarray, *cmaparray;
3183   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3184   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3185   Mat             B    = a->B;
3186   Vec             lvec = a->lvec, lcmap;
3187   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3188   MPI_Comm        comm;
3189   VecScatter      Mvctx = a->Mvctx;
3190 
3191   PetscFunctionBegin;
3192   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3193   PetscCall(ISGetLocalSize(iscol, &ncols));
3194 
3195   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3196   PetscCall(MatCreateVecs(mat, &x, NULL));
3197   PetscCall(VecSet(x, -1.0));
3198   PetscCall(VecDuplicate(x, &cmap));
3199   PetscCall(VecSet(cmap, -1.0));
3200 
3201   /* Get start indices */
3202   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3203   isstart -= ncols;
3204   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3205 
3206   PetscCall(ISGetIndices(iscol, &is_idx));
3207   PetscCall(VecGetArray(x, &xarray));
3208   PetscCall(VecGetArray(cmap, &cmaparray));
3209   PetscCall(PetscMalloc1(ncols, &idx));
3210   for (i = 0; i < ncols; i++) {
3211     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3212     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3213     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3214   }
3215   PetscCall(VecRestoreArray(x, &xarray));
3216   PetscCall(VecRestoreArray(cmap, &cmaparray));
3217   PetscCall(ISRestoreIndices(iscol, &is_idx));
3218 
3219   /* Get iscol_d */
3220   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3221   PetscCall(ISGetBlockSize(iscol, &i));
3222   PetscCall(ISSetBlockSize(*iscol_d, i));
3223 
3224   /* Get isrow_d */
3225   PetscCall(ISGetLocalSize(isrow, &m));
3226   rstart = mat->rmap->rstart;
3227   PetscCall(PetscMalloc1(m, &idx));
3228   PetscCall(ISGetIndices(isrow, &is_idx));
3229   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3230   PetscCall(ISRestoreIndices(isrow, &is_idx));
3231 
3232   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3233   PetscCall(ISGetBlockSize(isrow, &i));
3234   PetscCall(ISSetBlockSize(*isrow_d, i));
3235 
3236   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3237   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3238   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3239 
3240   PetscCall(VecDuplicate(lvec, &lcmap));
3241 
3242   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3243   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3244 
3245   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3246   /* off-process column indices */
3247   count = 0;
3248   PetscCall(PetscMalloc1(Bn, &idx));
3249   PetscCall(PetscMalloc1(Bn, &cmap1));
3250 
3251   PetscCall(VecGetArray(lvec, &xarray));
3252   PetscCall(VecGetArray(lcmap, &cmaparray));
3253   for (i = 0; i < Bn; i++) {
3254     if (PetscRealPart(xarray[i]) > -1.0) {
3255       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3256       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3257       count++;
3258     }
3259   }
3260   PetscCall(VecRestoreArray(lvec, &xarray));
3261   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3262 
3263   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3264   /* cannot ensure iscol_o has same blocksize as iscol! */
3265 
3266   PetscCall(PetscFree(idx));
3267   *garray = cmap1;
3268 
3269   PetscCall(VecDestroy(&x));
3270   PetscCall(VecDestroy(&cmap));
3271   PetscCall(VecDestroy(&lcmap));
3272   PetscFunctionReturn(PETSC_SUCCESS);
3273 }
3274 
3275 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3276 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3277 {
3278   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3279   Mat         M = NULL;
3280   MPI_Comm    comm;
3281   IS          iscol_d, isrow_d, iscol_o;
3282   Mat         Asub = NULL, Bsub = NULL;
3283   PetscInt    n;
3284 
3285   PetscFunctionBegin;
3286   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3287 
3288   if (call == MAT_REUSE_MATRIX) {
3289     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3290     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3291     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3292 
3293     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3294     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3295 
3296     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3297     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3298 
3299     /* Update diagonal and off-diagonal portions of submat */
3300     asub = (Mat_MPIAIJ *)(*submat)->data;
3301     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3302     PetscCall(ISGetLocalSize(iscol_o, &n));
3303     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3304     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3305     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3306 
3307   } else { /* call == MAT_INITIAL_MATRIX) */
3308     PetscInt *garray;
3309     PetscInt  BsubN;
3310 
3311     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3312     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3313 
3314     /* Create local submatrices Asub and Bsub */
3315     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3316     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3317 
3318     /* Create submatrix M */
3319     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3320 
3321     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3322     asub = (Mat_MPIAIJ *)M->data;
3323 
3324     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3325     n = asub->B->cmap->N;
3326     if (BsubN > n) {
3327       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3328       const PetscInt *idx;
3329       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3330       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3331 
3332       PetscCall(PetscMalloc1(n, &idx_new));
3333       j = 0;
3334       PetscCall(ISGetIndices(iscol_o, &idx));
3335       for (i = 0; i < n; i++) {
3336         if (j >= BsubN) break;
3337         while (subgarray[i] > garray[j]) j++;
3338 
3339         if (subgarray[i] == garray[j]) {
3340           idx_new[i] = idx[j++];
3341         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3342       }
3343       PetscCall(ISRestoreIndices(iscol_o, &idx));
3344 
3345       PetscCall(ISDestroy(&iscol_o));
3346       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3347 
3348     } else if (BsubN < n) {
3349       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3350     }
3351 
3352     PetscCall(PetscFree(garray));
3353     *submat = M;
3354 
3355     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3356     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3357     PetscCall(ISDestroy(&isrow_d));
3358 
3359     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3360     PetscCall(ISDestroy(&iscol_d));
3361 
3362     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3363     PetscCall(ISDestroy(&iscol_o));
3364   }
3365   PetscFunctionReturn(PETSC_SUCCESS);
3366 }
3367 
3368 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3369 {
3370   IS        iscol_local = NULL, isrow_d;
3371   PetscInt  csize;
3372   PetscInt  n, i, j, start, end;
3373   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3374   MPI_Comm  comm;
3375 
3376   PetscFunctionBegin;
3377   /* If isrow has same processor distribution as mat,
3378      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3379   if (call == MAT_REUSE_MATRIX) {
3380     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3381     if (isrow_d) {
3382       sameRowDist  = PETSC_TRUE;
3383       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3384     } else {
3385       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3386       if (iscol_local) {
3387         sameRowDist  = PETSC_TRUE;
3388         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3389       }
3390     }
3391   } else {
3392     /* Check if isrow has same processor distribution as mat */
3393     sameDist[0] = PETSC_FALSE;
3394     PetscCall(ISGetLocalSize(isrow, &n));
3395     if (!n) {
3396       sameDist[0] = PETSC_TRUE;
3397     } else {
3398       PetscCall(ISGetMinMax(isrow, &i, &j));
3399       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3400       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3401     }
3402 
3403     /* Check if iscol has same processor distribution as mat */
3404     sameDist[1] = PETSC_FALSE;
3405     PetscCall(ISGetLocalSize(iscol, &n));
3406     if (!n) {
3407       sameDist[1] = PETSC_TRUE;
3408     } else {
3409       PetscCall(ISGetMinMax(iscol, &i, &j));
3410       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3411       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3412     }
3413 
3414     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3415     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3416     sameRowDist = tsameDist[0];
3417   }
3418 
3419   if (sameRowDist) {
3420     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3421       /* isrow and iscol have same processor distribution as mat */
3422       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3423       PetscFunctionReturn(PETSC_SUCCESS);
3424     } else { /* sameRowDist */
3425       /* isrow has same processor distribution as mat */
3426       if (call == MAT_INITIAL_MATRIX) {
3427         PetscBool sorted;
3428         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3429         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3430         PetscCall(ISGetSize(iscol, &i));
3431         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3432 
3433         PetscCall(ISSorted(iscol_local, &sorted));
3434         if (sorted) {
3435           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3436           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3437           PetscFunctionReturn(PETSC_SUCCESS);
3438         }
3439       } else { /* call == MAT_REUSE_MATRIX */
3440         IS iscol_sub;
3441         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3442         if (iscol_sub) {
3443           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3444           PetscFunctionReturn(PETSC_SUCCESS);
3445         }
3446       }
3447     }
3448   }
3449 
3450   /* General case: iscol -> iscol_local which has global size of iscol */
3451   if (call == MAT_REUSE_MATRIX) {
3452     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3453     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3454   } else {
3455     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3456   }
3457 
3458   PetscCall(ISGetLocalSize(iscol, &csize));
3459   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3460 
3461   if (call == MAT_INITIAL_MATRIX) {
3462     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3463     PetscCall(ISDestroy(&iscol_local));
3464   }
3465   PetscFunctionReturn(PETSC_SUCCESS);
3466 }
3467 
3468 /*@C
3469   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3470   and "off-diagonal" part of the matrix in CSR format.
3471 
3472   Collective
3473 
3474   Input Parameters:
3475 + comm   - MPI communicator
3476 . A      - "diagonal" portion of matrix
3477 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3478 - garray - global index of `B` columns
3479 
3480   Output Parameter:
3481 . mat - the matrix, with input `A` as its local diagonal matrix
3482 
3483   Level: advanced
3484 
3485   Notes:
3486   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3487 
3488   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3489 
3490 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3491 @*/
3492 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3493 {
3494   Mat_MPIAIJ        *maij;
3495   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3496   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3497   const PetscScalar *oa;
3498   Mat                Bnew;
3499   PetscInt           m, n, N;
3500   MatType            mpi_mat_type;
3501 
3502   PetscFunctionBegin;
3503   PetscCall(MatCreate(comm, mat));
3504   PetscCall(MatGetSize(A, &m, &n));
3505   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3506   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3507   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3508   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3509 
3510   /* Get global columns of mat */
3511   PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3512 
3513   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3514   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3515   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3516   PetscCall(MatSetType(*mat, mpi_mat_type));
3517 
3518   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3519   maij = (Mat_MPIAIJ *)(*mat)->data;
3520 
3521   (*mat)->preallocated = PETSC_TRUE;
3522 
3523   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3524   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3525 
3526   /* Set A as diagonal portion of *mat */
3527   maij->A = A;
3528 
3529   nz = oi[m];
3530   for (i = 0; i < nz; i++) {
3531     col   = oj[i];
3532     oj[i] = garray[col];
3533   }
3534 
3535   /* Set Bnew as off-diagonal portion of *mat */
3536   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3537   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3538   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3539   bnew        = (Mat_SeqAIJ *)Bnew->data;
3540   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3541   maij->B     = Bnew;
3542 
3543   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3544 
3545   b->free_a  = PETSC_FALSE;
3546   b->free_ij = PETSC_FALSE;
3547   PetscCall(MatDestroy(&B));
3548 
3549   bnew->free_a  = PETSC_TRUE;
3550   bnew->free_ij = PETSC_TRUE;
3551 
3552   /* condense columns of maij->B */
3553   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3554   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3555   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3556   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3557   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3558   PetscFunctionReturn(PETSC_SUCCESS);
3559 }
3560 
3561 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3562 
3563 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3564 {
3565   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3566   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3567   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3568   Mat             M, Msub, B = a->B;
3569   MatScalar      *aa;
3570   Mat_SeqAIJ     *aij;
3571   PetscInt       *garray = a->garray, *colsub, Ncols;
3572   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3573   IS              iscol_sub, iscmap;
3574   const PetscInt *is_idx, *cmap;
3575   PetscBool       allcolumns = PETSC_FALSE;
3576   MPI_Comm        comm;
3577 
3578   PetscFunctionBegin;
3579   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3580   if (call == MAT_REUSE_MATRIX) {
3581     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3582     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3583     PetscCall(ISGetLocalSize(iscol_sub, &count));
3584 
3585     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3586     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3587 
3588     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3589     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3590 
3591     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3592 
3593   } else { /* call == MAT_INITIAL_MATRIX) */
3594     PetscBool flg;
3595 
3596     PetscCall(ISGetLocalSize(iscol, &n));
3597     PetscCall(ISGetSize(iscol, &Ncols));
3598 
3599     /* (1) iscol -> nonscalable iscol_local */
3600     /* Check for special case: each processor gets entire matrix columns */
3601     PetscCall(ISIdentity(iscol_local, &flg));
3602     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3603     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3604     if (allcolumns) {
3605       iscol_sub = iscol_local;
3606       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3607       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3608 
3609     } else {
3610       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3611       PetscInt *idx, *cmap1, k;
3612       PetscCall(PetscMalloc1(Ncols, &idx));
3613       PetscCall(PetscMalloc1(Ncols, &cmap1));
3614       PetscCall(ISGetIndices(iscol_local, &is_idx));
3615       count = 0;
3616       k     = 0;
3617       for (i = 0; i < Ncols; i++) {
3618         j = is_idx[i];
3619         if (j >= cstart && j < cend) {
3620           /* diagonal part of mat */
3621           idx[count]     = j;
3622           cmap1[count++] = i; /* column index in submat */
3623         } else if (Bn) {
3624           /* off-diagonal part of mat */
3625           if (j == garray[k]) {
3626             idx[count]     = j;
3627             cmap1[count++] = i; /* column index in submat */
3628           } else if (j > garray[k]) {
3629             while (j > garray[k] && k < Bn - 1) k++;
3630             if (j == garray[k]) {
3631               idx[count]     = j;
3632               cmap1[count++] = i; /* column index in submat */
3633             }
3634           }
3635         }
3636       }
3637       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3638 
3639       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3640       PetscCall(ISGetBlockSize(iscol, &cbs));
3641       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3642 
3643       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3644     }
3645 
3646     /* (3) Create sequential Msub */
3647     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3648   }
3649 
3650   PetscCall(ISGetLocalSize(iscol_sub, &count));
3651   aij = (Mat_SeqAIJ *)Msub->data;
3652   ii  = aij->i;
3653   PetscCall(ISGetIndices(iscmap, &cmap));
3654 
3655   /*
3656       m - number of local rows
3657       Ncols - number of columns (same on all processors)
3658       rstart - first row in new global matrix generated
3659   */
3660   PetscCall(MatGetSize(Msub, &m, NULL));
3661 
3662   if (call == MAT_INITIAL_MATRIX) {
3663     /* (4) Create parallel newmat */
3664     PetscMPIInt rank, size;
3665     PetscInt    csize;
3666 
3667     PetscCallMPI(MPI_Comm_size(comm, &size));
3668     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3669 
3670     /*
3671         Determine the number of non-zeros in the diagonal and off-diagonal
3672         portions of the matrix in order to do correct preallocation
3673     */
3674 
3675     /* first get start and end of "diagonal" columns */
3676     PetscCall(ISGetLocalSize(iscol, &csize));
3677     if (csize == PETSC_DECIDE) {
3678       PetscCall(ISGetSize(isrow, &mglobal));
3679       if (mglobal == Ncols) { /* square matrix */
3680         nlocal = m;
3681       } else {
3682         nlocal = Ncols / size + ((Ncols % size) > rank);
3683       }
3684     } else {
3685       nlocal = csize;
3686     }
3687     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3688     rstart = rend - nlocal;
3689     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3690 
3691     /* next, compute all the lengths */
3692     jj = aij->j;
3693     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3694     olens = dlens + m;
3695     for (i = 0; i < m; i++) {
3696       jend = ii[i + 1] - ii[i];
3697       olen = 0;
3698       dlen = 0;
3699       for (j = 0; j < jend; j++) {
3700         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3701         else dlen++;
3702         jj++;
3703       }
3704       olens[i] = olen;
3705       dlens[i] = dlen;
3706     }
3707 
3708     PetscCall(ISGetBlockSize(isrow, &bs));
3709     PetscCall(ISGetBlockSize(iscol, &cbs));
3710 
3711     PetscCall(MatCreate(comm, &M));
3712     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3713     PetscCall(MatSetBlockSizes(M, bs, cbs));
3714     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3715     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3716     PetscCall(PetscFree(dlens));
3717 
3718   } else { /* call == MAT_REUSE_MATRIX */
3719     M = *newmat;
3720     PetscCall(MatGetLocalSize(M, &i, NULL));
3721     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3722     PetscCall(MatZeroEntries(M));
3723     /*
3724          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3725        rather than the slower MatSetValues().
3726     */
3727     M->was_assembled = PETSC_TRUE;
3728     M->assembled     = PETSC_FALSE;
3729   }
3730 
3731   /* (5) Set values of Msub to *newmat */
3732   PetscCall(PetscMalloc1(count, &colsub));
3733   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3734 
3735   jj = aij->j;
3736   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3737   for (i = 0; i < m; i++) {
3738     row = rstart + i;
3739     nz  = ii[i + 1] - ii[i];
3740     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3741     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3742     jj += nz;
3743     aa += nz;
3744   }
3745   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3746   PetscCall(ISRestoreIndices(iscmap, &cmap));
3747 
3748   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3749   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3750 
3751   PetscCall(PetscFree(colsub));
3752 
3753   /* save Msub, iscol_sub and iscmap used in processor for next request */
3754   if (call == MAT_INITIAL_MATRIX) {
3755     *newmat = M;
3756     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3757     PetscCall(MatDestroy(&Msub));
3758 
3759     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3760     PetscCall(ISDestroy(&iscol_sub));
3761 
3762     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3763     PetscCall(ISDestroy(&iscmap));
3764 
3765     if (iscol_local) {
3766       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3767       PetscCall(ISDestroy(&iscol_local));
3768     }
3769   }
3770   PetscFunctionReturn(PETSC_SUCCESS);
3771 }
3772 
3773 /*
3774     Not great since it makes two copies of the submatrix, first an SeqAIJ
3775   in local and then by concatenating the local matrices the end result.
3776   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3777 
3778   This requires a sequential iscol with all indices.
3779 */
3780 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3781 {
3782   PetscMPIInt rank, size;
3783   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3784   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3785   Mat         M, Mreuse;
3786   MatScalar  *aa, *vwork;
3787   MPI_Comm    comm;
3788   Mat_SeqAIJ *aij;
3789   PetscBool   colflag, allcolumns = PETSC_FALSE;
3790 
3791   PetscFunctionBegin;
3792   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3793   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3794   PetscCallMPI(MPI_Comm_size(comm, &size));
3795 
3796   /* Check for special case: each processor gets entire matrix columns */
3797   PetscCall(ISIdentity(iscol, &colflag));
3798   PetscCall(ISGetLocalSize(iscol, &n));
3799   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3800   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3801 
3802   if (call == MAT_REUSE_MATRIX) {
3803     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3804     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3805     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3806   } else {
3807     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3808   }
3809 
3810   /*
3811       m - number of local rows
3812       n - number of columns (same on all processors)
3813       rstart - first row in new global matrix generated
3814   */
3815   PetscCall(MatGetSize(Mreuse, &m, &n));
3816   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3817   if (call == MAT_INITIAL_MATRIX) {
3818     aij = (Mat_SeqAIJ *)Mreuse->data;
3819     ii  = aij->i;
3820     jj  = aij->j;
3821 
3822     /*
3823         Determine the number of non-zeros in the diagonal and off-diagonal
3824         portions of the matrix in order to do correct preallocation
3825     */
3826 
3827     /* first get start and end of "diagonal" columns */
3828     if (csize == PETSC_DECIDE) {
3829       PetscCall(ISGetSize(isrow, &mglobal));
3830       if (mglobal == n) { /* square matrix */
3831         nlocal = m;
3832       } else {
3833         nlocal = n / size + ((n % size) > rank);
3834       }
3835     } else {
3836       nlocal = csize;
3837     }
3838     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3839     rstart = rend - nlocal;
3840     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3841 
3842     /* next, compute all the lengths */
3843     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3844     olens = dlens + m;
3845     for (i = 0; i < m; i++) {
3846       jend = ii[i + 1] - ii[i];
3847       olen = 0;
3848       dlen = 0;
3849       for (j = 0; j < jend; j++) {
3850         if (*jj < rstart || *jj >= rend) olen++;
3851         else dlen++;
3852         jj++;
3853       }
3854       olens[i] = olen;
3855       dlens[i] = dlen;
3856     }
3857     PetscCall(MatCreate(comm, &M));
3858     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3859     PetscCall(MatSetBlockSizes(M, bs, cbs));
3860     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3861     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3862     PetscCall(PetscFree(dlens));
3863   } else {
3864     PetscInt ml, nl;
3865 
3866     M = *newmat;
3867     PetscCall(MatGetLocalSize(M, &ml, &nl));
3868     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3869     PetscCall(MatZeroEntries(M));
3870     /*
3871          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3872        rather than the slower MatSetValues().
3873     */
3874     M->was_assembled = PETSC_TRUE;
3875     M->assembled     = PETSC_FALSE;
3876   }
3877   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3878   aij = (Mat_SeqAIJ *)Mreuse->data;
3879   ii  = aij->i;
3880   jj  = aij->j;
3881 
3882   /* trigger copy to CPU if needed */
3883   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3884   for (i = 0; i < m; i++) {
3885     row   = rstart + i;
3886     nz    = ii[i + 1] - ii[i];
3887     cwork = jj;
3888     jj    = PetscSafePointerPlusOffset(jj, nz);
3889     vwork = aa;
3890     aa    = PetscSafePointerPlusOffset(aa, nz);
3891     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3892   }
3893   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3894 
3895   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3896   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3897   *newmat = M;
3898 
3899   /* save submatrix used in processor for next request */
3900   if (call == MAT_INITIAL_MATRIX) {
3901     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3902     PetscCall(MatDestroy(&Mreuse));
3903   }
3904   PetscFunctionReturn(PETSC_SUCCESS);
3905 }
3906 
3907 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3908 {
3909   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3910   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3911   const PetscInt *JJ;
3912   PetscBool       nooffprocentries;
3913   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3914 
3915   PetscFunctionBegin;
3916   PetscCall(PetscLayoutSetUp(B->rmap));
3917   PetscCall(PetscLayoutSetUp(B->cmap));
3918   m       = B->rmap->n;
3919   cstart  = B->cmap->rstart;
3920   cend    = B->cmap->rend;
3921   rstart  = B->rmap->rstart;
3922   irstart = Ii[0];
3923 
3924   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3925 
3926   if (PetscDefined(USE_DEBUG)) {
3927     for (i = 0; i < m; i++) {
3928       nnz = Ii[i + 1] - Ii[i];
3929       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3930       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3931       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3932       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3933     }
3934   }
3935 
3936   for (i = 0; i < m; i++) {
3937     nnz     = Ii[i + 1] - Ii[i];
3938     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3939     nnz_max = PetscMax(nnz_max, nnz);
3940     d       = 0;
3941     for (j = 0; j < nnz; j++) {
3942       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3943     }
3944     d_nnz[i] = d;
3945     o_nnz[i] = nnz - d;
3946   }
3947   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3948   PetscCall(PetscFree2(d_nnz, o_nnz));
3949 
3950   for (i = 0; i < m; i++) {
3951     ii = i + rstart;
3952     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3953   }
3954   nooffprocentries    = B->nooffprocentries;
3955   B->nooffprocentries = PETSC_TRUE;
3956   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3957   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3958   B->nooffprocentries = nooffprocentries;
3959 
3960   /* count number of entries below block diagonal */
3961   PetscCall(PetscFree(Aij->ld));
3962   PetscCall(PetscCalloc1(m, &ld));
3963   Aij->ld = ld;
3964   for (i = 0; i < m; i++) {
3965     nnz = Ii[i + 1] - Ii[i];
3966     j   = 0;
3967     while (j < nnz && J[j] < cstart) j++;
3968     ld[i] = j;
3969     if (J) J += nnz;
3970   }
3971 
3972   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3973   PetscFunctionReturn(PETSC_SUCCESS);
3974 }
3975 
3976 /*@
3977   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3978   (the default parallel PETSc format).
3979 
3980   Collective
3981 
3982   Input Parameters:
3983 + B - the matrix
3984 . i - the indices into `j` for the start of each local row (indices start with zero)
3985 . j - the column indices for each local row (indices start with zero)
3986 - v - optional values in the matrix
3987 
3988   Level: developer
3989 
3990   Notes:
3991   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3992   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3993   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3994 
3995   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3996 
3997   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3998 
3999   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
4000 
4001   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
4002   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4003 
4004   The format which is used for the sparse matrix input, is equivalent to a
4005   row-major ordering.. i.e for the following matrix, the input data expected is
4006   as shown
4007 .vb
4008         1 0 0
4009         2 0 3     P0
4010        -------
4011         4 5 6     P1
4012 
4013      Process0 [P0] rows_owned=[0,1]
4014         i =  {0,1,3}  [size = nrow+1  = 2+1]
4015         j =  {0,0,2}  [size = 3]
4016         v =  {1,2,3}  [size = 3]
4017 
4018      Process1 [P1] rows_owned=[2]
4019         i =  {0,3}    [size = nrow+1  = 1+1]
4020         j =  {0,1,2}  [size = 3]
4021         v =  {4,5,6}  [size = 3]
4022 .ve
4023 
4024 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4025           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4026 @*/
4027 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4028 {
4029   PetscFunctionBegin;
4030   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4031   PetscFunctionReturn(PETSC_SUCCESS);
4032 }
4033 
4034 /*@
4035   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4036   (the default parallel PETSc format).  For good matrix assembly performance
4037   the user should preallocate the matrix storage by setting the parameters
4038   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4039 
4040   Collective
4041 
4042   Input Parameters:
4043 + B     - the matrix
4044 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4045            (same value is used for all local rows)
4046 . d_nnz - array containing the number of nonzeros in the various rows of the
4047            DIAGONAL portion of the local submatrix (possibly different for each row)
4048            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4049            The size of this array is equal to the number of local rows, i.e 'm'.
4050            For matrices that will be factored, you must leave room for (and set)
4051            the diagonal entry even if it is zero.
4052 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4053            submatrix (same value is used for all local rows).
4054 - o_nnz - array containing the number of nonzeros in the various rows of the
4055            OFF-DIAGONAL portion of the local submatrix (possibly different for
4056            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4057            structure. The size of this array is equal to the number
4058            of local rows, i.e 'm'.
4059 
4060   Example Usage:
4061   Consider the following 8x8 matrix with 34 non-zero values, that is
4062   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4063   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4064   as follows
4065 
4066 .vb
4067             1  2  0  |  0  3  0  |  0  4
4068     Proc0   0  5  6  |  7  0  0  |  8  0
4069             9  0 10  | 11  0  0  | 12  0
4070     -------------------------------------
4071            13  0 14  | 15 16 17  |  0  0
4072     Proc1   0 18  0  | 19 20 21  |  0  0
4073             0  0  0  | 22 23  0  | 24  0
4074     -------------------------------------
4075     Proc2  25 26 27  |  0  0 28  | 29  0
4076            30  0  0  | 31 32 33  |  0 34
4077 .ve
4078 
4079   This can be represented as a collection of submatrices as
4080 .vb
4081       A B C
4082       D E F
4083       G H I
4084 .ve
4085 
4086   Where the submatrices A,B,C are owned by proc0, D,E,F are
4087   owned by proc1, G,H,I are owned by proc2.
4088 
4089   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4090   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4091   The 'M','N' parameters are 8,8, and have the same values on all procs.
4092 
4093   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4094   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4095   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4096   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4097   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4098   matrix, and [DF] as another `MATSEQAIJ` matrix.
4099 
4100   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4101   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4102   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4103   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4104   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4105   In this case, the values of `d_nz`, `o_nz` are
4106 .vb
4107      proc0  dnz = 2, o_nz = 2
4108      proc1  dnz = 3, o_nz = 2
4109      proc2  dnz = 1, o_nz = 4
4110 .ve
4111   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4112   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4113   for proc3. i.e we are using 12+15+10=37 storage locations to store
4114   34 values.
4115 
4116   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4117   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4118   In the above case the values for `d_nnz`, `o_nnz` are
4119 .vb
4120      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4121      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4122      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4123 .ve
4124   Here the space allocated is sum of all the above values i.e 34, and
4125   hence pre-allocation is perfect.
4126 
4127   Level: intermediate
4128 
4129   Notes:
4130   If the *_nnz parameter is given then the *_nz parameter is ignored
4131 
4132   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4133   storage.  The stored row and column indices begin with zero.
4134   See [Sparse Matrices](sec_matsparse) for details.
4135 
4136   The parallel matrix is partitioned such that the first m0 rows belong to
4137   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4138   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4139 
4140   The DIAGONAL portion of the local submatrix of a processor can be defined
4141   as the submatrix which is obtained by extraction the part corresponding to
4142   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4143   first row that belongs to the processor, r2 is the last row belonging to
4144   the this processor, and c1-c2 is range of indices of the local part of a
4145   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4146   common case of a square matrix, the row and column ranges are the same and
4147   the DIAGONAL part is also square. The remaining portion of the local
4148   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4149 
4150   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4151 
4152   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4153   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4154   You can also run with the option `-info` and look for messages with the string
4155   malloc in them to see if additional memory allocation was needed.
4156 
4157 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4158           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4159 @*/
4160 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4161 {
4162   PetscFunctionBegin;
4163   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4164   PetscValidType(B, 1);
4165   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4166   PetscFunctionReturn(PETSC_SUCCESS);
4167 }
4168 
4169 /*@
4170   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4171   CSR format for the local rows.
4172 
4173   Collective
4174 
4175   Input Parameters:
4176 + comm - MPI communicator
4177 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4178 . n    - This value should be the same as the local size used in creating the
4179          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4180          calculated if `N` is given) For square matrices n is almost always `m`.
4181 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4182 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4183 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4184 . j    - global column indices
4185 - a    - optional matrix values
4186 
4187   Output Parameter:
4188 . mat - the matrix
4189 
4190   Level: intermediate
4191 
4192   Notes:
4193   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4194   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4195   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4196 
4197   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4198 
4199   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4200 
4201   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4202   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4203 
4204   The format which is used for the sparse matrix input, is equivalent to a
4205   row-major ordering, i.e., for the following matrix, the input data expected is
4206   as shown
4207 .vb
4208         1 0 0
4209         2 0 3     P0
4210        -------
4211         4 5 6     P1
4212 
4213      Process0 [P0] rows_owned=[0,1]
4214         i =  {0,1,3}  [size = nrow+1  = 2+1]
4215         j =  {0,0,2}  [size = 3]
4216         v =  {1,2,3}  [size = 3]
4217 
4218      Process1 [P1] rows_owned=[2]
4219         i =  {0,3}    [size = nrow+1  = 1+1]
4220         j =  {0,1,2}  [size = 3]
4221         v =  {4,5,6}  [size = 3]
4222 .ve
4223 
4224 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4225           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4226 @*/
4227 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4228 {
4229   PetscFunctionBegin;
4230   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4231   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4232   PetscCall(MatCreate(comm, mat));
4233   PetscCall(MatSetSizes(*mat, m, n, M, N));
4234   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4235   PetscCall(MatSetType(*mat, MATMPIAIJ));
4236   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4237   PetscFunctionReturn(PETSC_SUCCESS);
4238 }
4239 
4240 /*@
4241   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4242   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4243   from `MatCreateMPIAIJWithArrays()`
4244 
4245   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4246 
4247   Collective
4248 
4249   Input Parameters:
4250 + mat - the matrix
4251 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4252 . n   - This value should be the same as the local size used in creating the
4253        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4254        calculated if N is given) For square matrices n is almost always m.
4255 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4256 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4257 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4258 . J   - column indices
4259 - v   - matrix values
4260 
4261   Level: deprecated
4262 
4263 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4264           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4265 @*/
4266 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4267 {
4268   PetscInt        nnz, i;
4269   PetscBool       nooffprocentries;
4270   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4271   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4272   PetscScalar    *ad, *ao;
4273   PetscInt        ldi, Iii, md;
4274   const PetscInt *Adi = Ad->i;
4275   PetscInt       *ld  = Aij->ld;
4276 
4277   PetscFunctionBegin;
4278   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4279   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4280   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4281   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4282 
4283   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4284   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4285 
4286   for (i = 0; i < m; i++) {
4287     if (PetscDefined(USE_DEBUG)) {
4288       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4289         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4290         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4291       }
4292     }
4293     nnz = Ii[i + 1] - Ii[i];
4294     Iii = Ii[i];
4295     ldi = ld[i];
4296     md  = Adi[i + 1] - Adi[i];
4297     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4298     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4299     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4300     ad += md;
4301     ao += nnz - md;
4302   }
4303   nooffprocentries      = mat->nooffprocentries;
4304   mat->nooffprocentries = PETSC_TRUE;
4305   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4306   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4307   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4308   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4309   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4310   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4311   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4312   mat->nooffprocentries = nooffprocentries;
4313   PetscFunctionReturn(PETSC_SUCCESS);
4314 }
4315 
4316 /*@
4317   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4318 
4319   Collective
4320 
4321   Input Parameters:
4322 + mat - the matrix
4323 - v   - matrix values, stored by row
4324 
4325   Level: intermediate
4326 
4327   Notes:
4328   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4329 
4330   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4331 
4332 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4333           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4334 @*/
4335 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4336 {
4337   PetscInt        nnz, i, m;
4338   PetscBool       nooffprocentries;
4339   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4340   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4341   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4342   PetscScalar    *ad, *ao;
4343   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4344   PetscInt        ldi, Iii, md;
4345   PetscInt       *ld = Aij->ld;
4346 
4347   PetscFunctionBegin;
4348   m = mat->rmap->n;
4349 
4350   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4351   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4352   Iii = 0;
4353   for (i = 0; i < m; i++) {
4354     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4355     ldi = ld[i];
4356     md  = Adi[i + 1] - Adi[i];
4357     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4358     ad += md;
4359     if (ao) {
4360       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4361       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4362       ao += nnz - md;
4363     }
4364     Iii += nnz;
4365   }
4366   nooffprocentries      = mat->nooffprocentries;
4367   mat->nooffprocentries = PETSC_TRUE;
4368   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4369   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4370   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4371   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4372   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4373   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4374   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4375   mat->nooffprocentries = nooffprocentries;
4376   PetscFunctionReturn(PETSC_SUCCESS);
4377 }
4378 
4379 /*@
4380   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4381   (the default parallel PETSc format).  For good matrix assembly performance
4382   the user should preallocate the matrix storage by setting the parameters
4383   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4384 
4385   Collective
4386 
4387   Input Parameters:
4388 + comm  - MPI communicator
4389 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4390           This value should be the same as the local size used in creating the
4391           y vector for the matrix-vector product y = Ax.
4392 . n     - This value should be the same as the local size used in creating the
4393           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4394           calculated if N is given) For square matrices n is almost always m.
4395 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4396 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4397 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4398           (same value is used for all local rows)
4399 . d_nnz - array containing the number of nonzeros in the various rows of the
4400           DIAGONAL portion of the local submatrix (possibly different for each row)
4401           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4402           The size of this array is equal to the number of local rows, i.e 'm'.
4403 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4404           submatrix (same value is used for all local rows).
4405 - o_nnz - array containing the number of nonzeros in the various rows of the
4406           OFF-DIAGONAL portion of the local submatrix (possibly different for
4407           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4408           structure. The size of this array is equal to the number
4409           of local rows, i.e 'm'.
4410 
4411   Output Parameter:
4412 . A - the matrix
4413 
4414   Options Database Keys:
4415 + -mat_no_inode                     - Do not use inodes
4416 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4417 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4418                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4419                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4420 
4421   Level: intermediate
4422 
4423   Notes:
4424   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4425   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4426   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4427 
4428   If the *_nnz parameter is given then the *_nz parameter is ignored
4429 
4430   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4431   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4432   storage requirements for this matrix.
4433 
4434   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4435   processor than it must be used on all processors that share the object for
4436   that argument.
4437 
4438   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4439   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4440 
4441   The user MUST specify either the local or global matrix dimensions
4442   (possibly both).
4443 
4444   The parallel matrix is partitioned across processors such that the
4445   first `m0` rows belong to process 0, the next `m1` rows belong to
4446   process 1, the next `m2` rows belong to process 2, etc., where
4447   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4448   values corresponding to [m x N] submatrix.
4449 
4450   The columns are logically partitioned with the n0 columns belonging
4451   to 0th partition, the next n1 columns belonging to the next
4452   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4453 
4454   The DIAGONAL portion of the local submatrix on any given processor
4455   is the submatrix corresponding to the rows and columns m,n
4456   corresponding to the given processor. i.e diagonal matrix on
4457   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4458   etc. The remaining portion of the local submatrix [m x (N-n)]
4459   constitute the OFF-DIAGONAL portion. The example below better
4460   illustrates this concept. The two matrices, the DIAGONAL portion and
4461   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4462 
4463   For a square global matrix we define each processor's diagonal portion
4464   to be its local rows and the corresponding columns (a square submatrix);
4465   each processor's off-diagonal portion encompasses the remainder of the
4466   local matrix (a rectangular submatrix).
4467 
4468   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4469 
4470   When calling this routine with a single process communicator, a matrix of
4471   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4472   type of communicator, use the construction mechanism
4473 .vb
4474   MatCreate(..., &A);
4475   MatSetType(A, MATMPIAIJ);
4476   MatSetSizes(A, m, n, M, N);
4477   MatMPIAIJSetPreallocation(A, ...);
4478 .ve
4479 
4480   By default, this format uses inodes (identical nodes) when possible.
4481   We search for consecutive rows with the same nonzero structure, thereby
4482   reusing matrix information to achieve increased efficiency.
4483 
4484   Example Usage:
4485   Consider the following 8x8 matrix with 34 non-zero values, that is
4486   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4487   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4488   as follows
4489 
4490 .vb
4491             1  2  0  |  0  3  0  |  0  4
4492     Proc0   0  5  6  |  7  0  0  |  8  0
4493             9  0 10  | 11  0  0  | 12  0
4494     -------------------------------------
4495            13  0 14  | 15 16 17  |  0  0
4496     Proc1   0 18  0  | 19 20 21  |  0  0
4497             0  0  0  | 22 23  0  | 24  0
4498     -------------------------------------
4499     Proc2  25 26 27  |  0  0 28  | 29  0
4500            30  0  0  | 31 32 33  |  0 34
4501 .ve
4502 
4503   This can be represented as a collection of submatrices as
4504 
4505 .vb
4506       A B C
4507       D E F
4508       G H I
4509 .ve
4510 
4511   Where the submatrices A,B,C are owned by proc0, D,E,F are
4512   owned by proc1, G,H,I are owned by proc2.
4513 
4514   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4515   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4516   The 'M','N' parameters are 8,8, and have the same values on all procs.
4517 
4518   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4519   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4520   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4521   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4522   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4523   matrix, and [DF] as another SeqAIJ matrix.
4524 
4525   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4526   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4527   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4528   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4529   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4530   In this case, the values of `d_nz`,`o_nz` are
4531 .vb
4532      proc0  dnz = 2, o_nz = 2
4533      proc1  dnz = 3, o_nz = 2
4534      proc2  dnz = 1, o_nz = 4
4535 .ve
4536   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4537   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4538   for proc3. i.e we are using 12+15+10=37 storage locations to store
4539   34 values.
4540 
4541   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4542   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4543   In the above case the values for d_nnz,o_nnz are
4544 .vb
4545      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4546      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4547      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4548 .ve
4549   Here the space allocated is sum of all the above values i.e 34, and
4550   hence pre-allocation is perfect.
4551 
4552 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4553           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4554           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4555 @*/
4556 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4557 {
4558   PetscMPIInt size;
4559 
4560   PetscFunctionBegin;
4561   PetscCall(MatCreate(comm, A));
4562   PetscCall(MatSetSizes(*A, m, n, M, N));
4563   PetscCallMPI(MPI_Comm_size(comm, &size));
4564   if (size > 1) {
4565     PetscCall(MatSetType(*A, MATMPIAIJ));
4566     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4567   } else {
4568     PetscCall(MatSetType(*A, MATSEQAIJ));
4569     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4570   }
4571   PetscFunctionReturn(PETSC_SUCCESS);
4572 }
4573 
4574 /*MC
4575     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4576 
4577     Synopsis:
4578     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4579 
4580     Not Collective
4581 
4582     Input Parameter:
4583 .   A - the `MATMPIAIJ` matrix
4584 
4585     Output Parameters:
4586 +   Ad - the diagonal portion of the matrix
4587 .   Ao - the off-diagonal portion of the matrix
4588 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4589 -   ierr - error code
4590 
4591      Level: advanced
4592 
4593     Note:
4594     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4595 
4596 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4597 M*/
4598 
4599 /*MC
4600     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4601 
4602     Synopsis:
4603     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4604 
4605     Not Collective
4606 
4607     Input Parameters:
4608 +   A - the `MATMPIAIJ` matrix
4609 .   Ad - the diagonal portion of the matrix
4610 .   Ao - the off-diagonal portion of the matrix
4611 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4612 -   ierr - error code
4613 
4614      Level: advanced
4615 
4616 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4617 M*/
4618 
4619 /*@C
4620   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4621 
4622   Not Collective
4623 
4624   Input Parameter:
4625 . A - The `MATMPIAIJ` matrix
4626 
4627   Output Parameters:
4628 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4629 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4630 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4631 
4632   Level: intermediate
4633 
4634   Note:
4635   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4636   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4637   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4638   local column numbers to global column numbers in the original matrix.
4639 
4640   Fortran Notes:
4641   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4642 
4643 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4644 @*/
4645 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4646 {
4647   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4648   PetscBool   flg;
4649 
4650   PetscFunctionBegin;
4651   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4652   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4653   if (Ad) *Ad = a->A;
4654   if (Ao) *Ao = a->B;
4655   if (colmap) *colmap = a->garray;
4656   PetscFunctionReturn(PETSC_SUCCESS);
4657 }
4658 
4659 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4660 {
4661   PetscInt     m, N, i, rstart, nnz, Ii;
4662   PetscInt    *indx;
4663   PetscScalar *values;
4664   MatType      rootType;
4665 
4666   PetscFunctionBegin;
4667   PetscCall(MatGetSize(inmat, &m, &N));
4668   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4669     PetscInt *dnz, *onz, sum, bs, cbs;
4670 
4671     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4672     /* Check sum(n) = N */
4673     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4674     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4675 
4676     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4677     rstart -= m;
4678 
4679     MatPreallocateBegin(comm, m, n, dnz, onz);
4680     for (i = 0; i < m; i++) {
4681       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4682       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4683       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4684     }
4685 
4686     PetscCall(MatCreate(comm, outmat));
4687     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4688     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4689     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4690     PetscCall(MatGetRootType_Private(inmat, &rootType));
4691     PetscCall(MatSetType(*outmat, rootType));
4692     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4693     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4694     MatPreallocateEnd(dnz, onz);
4695     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4696   }
4697 
4698   /* numeric phase */
4699   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4700   for (i = 0; i < m; i++) {
4701     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4702     Ii = i + rstart;
4703     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4704     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4705   }
4706   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4707   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4708   PetscFunctionReturn(PETSC_SUCCESS);
4709 }
4710 
4711 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data)
4712 {
4713   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data;
4714 
4715   PetscFunctionBegin;
4716   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4717   PetscCall(PetscFree(merge->id_r));
4718   PetscCall(PetscFree(merge->len_s));
4719   PetscCall(PetscFree(merge->len_r));
4720   PetscCall(PetscFree(merge->bi));
4721   PetscCall(PetscFree(merge->bj));
4722   PetscCall(PetscFree(merge->buf_ri[0]));
4723   PetscCall(PetscFree(merge->buf_ri));
4724   PetscCall(PetscFree(merge->buf_rj[0]));
4725   PetscCall(PetscFree(merge->buf_rj));
4726   PetscCall(PetscFree(merge->coi));
4727   PetscCall(PetscFree(merge->coj));
4728   PetscCall(PetscFree(merge->owners_co));
4729   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4730   PetscCall(PetscFree(merge));
4731   PetscFunctionReturn(PETSC_SUCCESS);
4732 }
4733 
4734 #include <../src/mat/utils/freespace.h>
4735 #include <petscbt.h>
4736 
4737 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4738 {
4739   MPI_Comm             comm;
4740   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4741   PetscMPIInt          size, rank, taga, *len_s;
4742   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4743   PetscMPIInt          proc, k;
4744   PetscInt           **buf_ri, **buf_rj;
4745   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4746   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4747   MPI_Request         *s_waits, *r_waits;
4748   MPI_Status          *status;
4749   const MatScalar     *aa, *a_a;
4750   MatScalar          **abuf_r, *ba_i;
4751   Mat_Merge_SeqsToMPI *merge;
4752   PetscContainer       container;
4753 
4754   PetscFunctionBegin;
4755   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4756   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4757 
4758   PetscCallMPI(MPI_Comm_size(comm, &size));
4759   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4760 
4761   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4762   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4763   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4764   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4765   aa = a_a;
4766 
4767   bi     = merge->bi;
4768   bj     = merge->bj;
4769   buf_ri = merge->buf_ri;
4770   buf_rj = merge->buf_rj;
4771 
4772   PetscCall(PetscMalloc1(size, &status));
4773   owners = merge->rowmap->range;
4774   len_s  = merge->len_s;
4775 
4776   /* send and recv matrix values */
4777   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4778   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4779 
4780   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4781   for (proc = 0, k = 0; proc < size; proc++) {
4782     if (!len_s[proc]) continue;
4783     i = owners[proc];
4784     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4785     k++;
4786   }
4787 
4788   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4789   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4790   PetscCall(PetscFree(status));
4791 
4792   PetscCall(PetscFree(s_waits));
4793   PetscCall(PetscFree(r_waits));
4794 
4795   /* insert mat values of mpimat */
4796   PetscCall(PetscMalloc1(N, &ba_i));
4797   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4798 
4799   for (k = 0; k < merge->nrecv; k++) {
4800     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4801     nrows       = *buf_ri_k[k];
4802     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4803     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4804   }
4805 
4806   /* set values of ba */
4807   m = merge->rowmap->n;
4808   for (i = 0; i < m; i++) {
4809     arow = owners[rank] + i;
4810     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4811     bnzi = bi[i + 1] - bi[i];
4812     PetscCall(PetscArrayzero(ba_i, bnzi));
4813 
4814     /* add local non-zero vals of this proc's seqmat into ba */
4815     anzi   = ai[arow + 1] - ai[arow];
4816     aj     = a->j + ai[arow];
4817     aa     = a_a + ai[arow];
4818     nextaj = 0;
4819     for (j = 0; nextaj < anzi; j++) {
4820       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4821         ba_i[j] += aa[nextaj++];
4822       }
4823     }
4824 
4825     /* add received vals into ba */
4826     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4827       /* i-th row */
4828       if (i == *nextrow[k]) {
4829         anzi   = *(nextai[k] + 1) - *nextai[k];
4830         aj     = buf_rj[k] + *nextai[k];
4831         aa     = abuf_r[k] + *nextai[k];
4832         nextaj = 0;
4833         for (j = 0; nextaj < anzi; j++) {
4834           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4835             ba_i[j] += aa[nextaj++];
4836           }
4837         }
4838         nextrow[k]++;
4839         nextai[k]++;
4840       }
4841     }
4842     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4843   }
4844   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4845   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4846   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4847 
4848   PetscCall(PetscFree(abuf_r[0]));
4849   PetscCall(PetscFree(abuf_r));
4850   PetscCall(PetscFree(ba_i));
4851   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4852   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4853   PetscFunctionReturn(PETSC_SUCCESS);
4854 }
4855 
4856 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4857 {
4858   Mat                  B_mpi;
4859   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4860   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4861   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4862   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4863   PetscInt             len, *dnz, *onz, bs, cbs;
4864   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4865   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4866   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4867   MPI_Status          *status;
4868   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4869   PetscBT              lnkbt;
4870   Mat_Merge_SeqsToMPI *merge;
4871   PetscContainer       container;
4872 
4873   PetscFunctionBegin;
4874   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4875 
4876   /* make sure it is a PETSc comm */
4877   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4878   PetscCallMPI(MPI_Comm_size(comm, &size));
4879   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4880 
4881   PetscCall(PetscNew(&merge));
4882   PetscCall(PetscMalloc1(size, &status));
4883 
4884   /* determine row ownership */
4885   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4886   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4887   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4888   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4889   PetscCall(PetscLayoutSetUp(merge->rowmap));
4890   PetscCall(PetscMalloc1(size, &len_si));
4891   PetscCall(PetscMalloc1(size, &merge->len_s));
4892 
4893   m      = merge->rowmap->n;
4894   owners = merge->rowmap->range;
4895 
4896   /* determine the number of messages to send, their lengths */
4897   len_s = merge->len_s;
4898 
4899   len          = 0; /* length of buf_si[] */
4900   merge->nsend = 0;
4901   for (PetscMPIInt proc = 0; proc < size; proc++) {
4902     len_si[proc] = 0;
4903     if (proc == rank) {
4904       len_s[proc] = 0;
4905     } else {
4906       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4907       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4908     }
4909     if (len_s[proc]) {
4910       merge->nsend++;
4911       nrows = 0;
4912       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4913         if (ai[i + 1] > ai[i]) nrows++;
4914       }
4915       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4916       len += len_si[proc];
4917     }
4918   }
4919 
4920   /* determine the number and length of messages to receive for ij-structure */
4921   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4922   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4923 
4924   /* post the Irecv of j-structure */
4925   PetscCall(PetscCommGetNewTag(comm, &tagj));
4926   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4927 
4928   /* post the Isend of j-structure */
4929   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4930 
4931   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4932     if (!len_s[proc]) continue;
4933     i = owners[proc];
4934     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4935     k++;
4936   }
4937 
4938   /* receives and sends of j-structure are complete */
4939   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4940   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4941 
4942   /* send and recv i-structure */
4943   PetscCall(PetscCommGetNewTag(comm, &tagi));
4944   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4945 
4946   PetscCall(PetscMalloc1(len + 1, &buf_s));
4947   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4948   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4949     if (!len_s[proc]) continue;
4950     /* form outgoing message for i-structure:
4951          buf_si[0]:                 nrows to be sent
4952                [1:nrows]:           row index (global)
4953                [nrows+1:2*nrows+1]: i-structure index
4954     */
4955     nrows       = len_si[proc] / 2 - 1;
4956     buf_si_i    = buf_si + nrows + 1;
4957     buf_si[0]   = nrows;
4958     buf_si_i[0] = 0;
4959     nrows       = 0;
4960     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4961       anzi = ai[i + 1] - ai[i];
4962       if (anzi) {
4963         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4964         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4965         nrows++;
4966       }
4967     }
4968     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4969     k++;
4970     buf_si += len_si[proc];
4971   }
4972 
4973   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4974   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4975 
4976   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4977   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4978 
4979   PetscCall(PetscFree(len_si));
4980   PetscCall(PetscFree(len_ri));
4981   PetscCall(PetscFree(rj_waits));
4982   PetscCall(PetscFree2(si_waits, sj_waits));
4983   PetscCall(PetscFree(ri_waits));
4984   PetscCall(PetscFree(buf_s));
4985   PetscCall(PetscFree(status));
4986 
4987   /* compute a local seq matrix in each processor */
4988   /* allocate bi array and free space for accumulating nonzero column info */
4989   PetscCall(PetscMalloc1(m + 1, &bi));
4990   bi[0] = 0;
4991 
4992   /* create and initialize a linked list */
4993   nlnk = N + 1;
4994   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4995 
4996   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4997   len = ai[owners[rank + 1]] - ai[owners[rank]];
4998   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4999 
5000   current_space = free_space;
5001 
5002   /* determine symbolic info for each local row */
5003   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
5004 
5005   for (k = 0; k < merge->nrecv; k++) {
5006     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5007     nrows       = *buf_ri_k[k];
5008     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
5009     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
5010   }
5011 
5012   MatPreallocateBegin(comm, m, n, dnz, onz);
5013   len = 0;
5014   for (i = 0; i < m; i++) {
5015     bnzi = 0;
5016     /* add local non-zero cols of this proc's seqmat into lnk */
5017     arow = owners[rank] + i;
5018     anzi = ai[arow + 1] - ai[arow];
5019     aj   = a->j + ai[arow];
5020     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5021     bnzi += nlnk;
5022     /* add received col data into lnk */
5023     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5024       if (i == *nextrow[k]) {            /* i-th row */
5025         anzi = *(nextai[k] + 1) - *nextai[k];
5026         aj   = buf_rj[k] + *nextai[k];
5027         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5028         bnzi += nlnk;
5029         nextrow[k]++;
5030         nextai[k]++;
5031       }
5032     }
5033     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5034 
5035     /* if free space is not available, make more free space */
5036     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5037     /* copy data into free space, then initialize lnk */
5038     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5039     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5040 
5041     current_space->array += bnzi;
5042     current_space->local_used += bnzi;
5043     current_space->local_remaining -= bnzi;
5044 
5045     bi[i + 1] = bi[i] + bnzi;
5046   }
5047 
5048   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5049 
5050   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5051   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5052   PetscCall(PetscLLDestroy(lnk, lnkbt));
5053 
5054   /* create symbolic parallel matrix B_mpi */
5055   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5056   PetscCall(MatCreate(comm, &B_mpi));
5057   if (n == PETSC_DECIDE) {
5058     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5059   } else {
5060     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5061   }
5062   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5063   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5064   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5065   MatPreallocateEnd(dnz, onz);
5066   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5067 
5068   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5069   B_mpi->assembled = PETSC_FALSE;
5070   merge->bi        = bi;
5071   merge->bj        = bj;
5072   merge->buf_ri    = buf_ri;
5073   merge->buf_rj    = buf_rj;
5074   merge->coi       = NULL;
5075   merge->coj       = NULL;
5076   merge->owners_co = NULL;
5077 
5078   PetscCall(PetscCommDestroy(&comm));
5079 
5080   /* attach the supporting struct to B_mpi for reuse */
5081   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5082   PetscCall(PetscContainerSetPointer(container, merge));
5083   PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5084   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5085   PetscCall(PetscContainerDestroy(&container));
5086   *mpimat = B_mpi;
5087 
5088   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5089   PetscFunctionReturn(PETSC_SUCCESS);
5090 }
5091 
5092 /*@
5093   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5094   matrices from each processor
5095 
5096   Collective
5097 
5098   Input Parameters:
5099 + comm   - the communicators the parallel matrix will live on
5100 . seqmat - the input sequential matrices
5101 . m      - number of local rows (or `PETSC_DECIDE`)
5102 . n      - number of local columns (or `PETSC_DECIDE`)
5103 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5104 
5105   Output Parameter:
5106 . mpimat - the parallel matrix generated
5107 
5108   Level: advanced
5109 
5110   Note:
5111   The dimensions of the sequential matrix in each processor MUST be the same.
5112   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5113   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5114 
5115 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5116 @*/
5117 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5118 {
5119   PetscMPIInt size;
5120 
5121   PetscFunctionBegin;
5122   PetscCallMPI(MPI_Comm_size(comm, &size));
5123   if (size == 1) {
5124     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5125     if (scall == MAT_INITIAL_MATRIX) {
5126       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5127     } else {
5128       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5129     }
5130     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5131     PetscFunctionReturn(PETSC_SUCCESS);
5132   }
5133   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5134   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5135   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5136   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5137   PetscFunctionReturn(PETSC_SUCCESS);
5138 }
5139 
5140 /*@
5141   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5142 
5143   Not Collective
5144 
5145   Input Parameter:
5146 . A - the matrix
5147 
5148   Output Parameter:
5149 . A_loc - the local sequential matrix generated
5150 
5151   Level: developer
5152 
5153   Notes:
5154   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5155   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5156   `n` is the global column count obtained with `MatGetSize()`
5157 
5158   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5159 
5160   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5161 
5162   Destroy the matrix with `MatDestroy()`
5163 
5164 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5165 @*/
5166 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5167 {
5168   PetscBool mpi;
5169 
5170   PetscFunctionBegin;
5171   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5172   if (mpi) {
5173     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5174   } else {
5175     *A_loc = A;
5176     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5177   }
5178   PetscFunctionReturn(PETSC_SUCCESS);
5179 }
5180 
5181 /*@
5182   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5183 
5184   Not Collective
5185 
5186   Input Parameters:
5187 + A     - the matrix
5188 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5189 
5190   Output Parameter:
5191 . A_loc - the local sequential matrix generated
5192 
5193   Level: developer
5194 
5195   Notes:
5196   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5197   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5198   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5199 
5200   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5201 
5202   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5203   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5204   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5205   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5206 
5207 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5208 @*/
5209 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5210 {
5211   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5212   Mat_SeqAIJ        *mat, *a, *b;
5213   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5214   const PetscScalar *aa, *ba, *aav, *bav;
5215   PetscScalar       *ca, *cam;
5216   PetscMPIInt        size;
5217   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5218   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5219   PetscBool          match;
5220 
5221   PetscFunctionBegin;
5222   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5223   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5224   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5225   if (size == 1) {
5226     if (scall == MAT_INITIAL_MATRIX) {
5227       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5228       *A_loc = mpimat->A;
5229     } else if (scall == MAT_REUSE_MATRIX) {
5230       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5231     }
5232     PetscFunctionReturn(PETSC_SUCCESS);
5233   }
5234 
5235   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5236   a  = (Mat_SeqAIJ *)mpimat->A->data;
5237   b  = (Mat_SeqAIJ *)mpimat->B->data;
5238   ai = a->i;
5239   aj = a->j;
5240   bi = b->i;
5241   bj = b->j;
5242   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5243   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5244   aa = aav;
5245   ba = bav;
5246   if (scall == MAT_INITIAL_MATRIX) {
5247     PetscCall(PetscMalloc1(1 + am, &ci));
5248     ci[0] = 0;
5249     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5250     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5251     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5252     k = 0;
5253     for (i = 0; i < am; i++) {
5254       ncols_o = bi[i + 1] - bi[i];
5255       ncols_d = ai[i + 1] - ai[i];
5256       /* off-diagonal portion of A */
5257       for (jo = 0; jo < ncols_o; jo++) {
5258         col = cmap[*bj];
5259         if (col >= cstart) break;
5260         cj[k] = col;
5261         bj++;
5262         ca[k++] = *ba++;
5263       }
5264       /* diagonal portion of A */
5265       for (j = 0; j < ncols_d; j++) {
5266         cj[k]   = cstart + *aj++;
5267         ca[k++] = *aa++;
5268       }
5269       /* off-diagonal portion of A */
5270       for (j = jo; j < ncols_o; j++) {
5271         cj[k]   = cmap[*bj++];
5272         ca[k++] = *ba++;
5273       }
5274     }
5275     /* put together the new matrix */
5276     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5277     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5278     /* Since these are PETSc arrays, change flags to free them as necessary. */
5279     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5280     mat->free_a  = PETSC_TRUE;
5281     mat->free_ij = PETSC_TRUE;
5282     mat->nonew   = 0;
5283   } else if (scall == MAT_REUSE_MATRIX) {
5284     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5285     ci  = mat->i;
5286     cj  = mat->j;
5287     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5288     for (i = 0; i < am; i++) {
5289       /* off-diagonal portion of A */
5290       ncols_o = bi[i + 1] - bi[i];
5291       for (jo = 0; jo < ncols_o; jo++) {
5292         col = cmap[*bj];
5293         if (col >= cstart) break;
5294         *cam++ = *ba++;
5295         bj++;
5296       }
5297       /* diagonal portion of A */
5298       ncols_d = ai[i + 1] - ai[i];
5299       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5300       /* off-diagonal portion of A */
5301       for (j = jo; j < ncols_o; j++) {
5302         *cam++ = *ba++;
5303         bj++;
5304       }
5305     }
5306     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5307   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5308   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5309   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5310   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5311   PetscFunctionReturn(PETSC_SUCCESS);
5312 }
5313 
5314 /*@
5315   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5316   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5317 
5318   Not Collective
5319 
5320   Input Parameters:
5321 + A     - the matrix
5322 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5323 
5324   Output Parameters:
5325 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5326 - A_loc - the local sequential matrix generated
5327 
5328   Level: developer
5329 
5330   Note:
5331   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5332   part, then those associated with the off-diagonal part (in its local ordering)
5333 
5334 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5335 @*/
5336 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5337 {
5338   Mat             Ao, Ad;
5339   const PetscInt *cmap;
5340   PetscMPIInt     size;
5341   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5342 
5343   PetscFunctionBegin;
5344   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5345   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5346   if (size == 1) {
5347     if (scall == MAT_INITIAL_MATRIX) {
5348       PetscCall(PetscObjectReference((PetscObject)Ad));
5349       *A_loc = Ad;
5350     } else if (scall == MAT_REUSE_MATRIX) {
5351       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5352     }
5353     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5354     PetscFunctionReturn(PETSC_SUCCESS);
5355   }
5356   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5357   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5358   if (f) {
5359     PetscCall((*f)(A, scall, glob, A_loc));
5360   } else {
5361     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5362     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5363     Mat_SeqAIJ        *c;
5364     PetscInt          *ai = a->i, *aj = a->j;
5365     PetscInt          *bi = b->i, *bj = b->j;
5366     PetscInt          *ci, *cj;
5367     const PetscScalar *aa, *ba;
5368     PetscScalar       *ca;
5369     PetscInt           i, j, am, dn, on;
5370 
5371     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5372     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5373     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5374     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5375     if (scall == MAT_INITIAL_MATRIX) {
5376       PetscInt k;
5377       PetscCall(PetscMalloc1(1 + am, &ci));
5378       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5379       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5380       ci[0] = 0;
5381       for (i = 0, k = 0; i < am; i++) {
5382         const PetscInt ncols_o = bi[i + 1] - bi[i];
5383         const PetscInt ncols_d = ai[i + 1] - ai[i];
5384         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5385         /* diagonal portion of A */
5386         for (j = 0; j < ncols_d; j++, k++) {
5387           cj[k] = *aj++;
5388           ca[k] = *aa++;
5389         }
5390         /* off-diagonal portion of A */
5391         for (j = 0; j < ncols_o; j++, k++) {
5392           cj[k] = dn + *bj++;
5393           ca[k] = *ba++;
5394         }
5395       }
5396       /* put together the new matrix */
5397       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5398       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5399       /* Since these are PETSc arrays, change flags to free them as necessary. */
5400       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5401       c->free_a  = PETSC_TRUE;
5402       c->free_ij = PETSC_TRUE;
5403       c->nonew   = 0;
5404       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5405     } else if (scall == MAT_REUSE_MATRIX) {
5406       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5407       for (i = 0; i < am; i++) {
5408         const PetscInt ncols_d = ai[i + 1] - ai[i];
5409         const PetscInt ncols_o = bi[i + 1] - bi[i];
5410         /* diagonal portion of A */
5411         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5412         /* off-diagonal portion of A */
5413         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5414       }
5415       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5416     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5417     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5418     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5419     if (glob) {
5420       PetscInt cst, *gidx;
5421 
5422       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5423       PetscCall(PetscMalloc1(dn + on, &gidx));
5424       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5425       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5426       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5427     }
5428   }
5429   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5430   PetscFunctionReturn(PETSC_SUCCESS);
5431 }
5432 
5433 /*@C
5434   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5435 
5436   Not Collective
5437 
5438   Input Parameters:
5439 + A     - the matrix
5440 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5441 . row   - index set of rows to extract (or `NULL`)
5442 - col   - index set of columns to extract (or `NULL`)
5443 
5444   Output Parameter:
5445 . A_loc - the local sequential matrix generated
5446 
5447   Level: developer
5448 
5449 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5450 @*/
5451 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5452 {
5453   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5454   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5455   IS          isrowa, iscola;
5456   Mat        *aloc;
5457   PetscBool   match;
5458 
5459   PetscFunctionBegin;
5460   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5461   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5462   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5463   if (!row) {
5464     start = A->rmap->rstart;
5465     end   = A->rmap->rend;
5466     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5467   } else {
5468     isrowa = *row;
5469   }
5470   if (!col) {
5471     start = A->cmap->rstart;
5472     cmap  = a->garray;
5473     nzA   = a->A->cmap->n;
5474     nzB   = a->B->cmap->n;
5475     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5476     ncols = 0;
5477     for (i = 0; i < nzB; i++) {
5478       if (cmap[i] < start) idx[ncols++] = cmap[i];
5479       else break;
5480     }
5481     imark = i;
5482     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5483     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5484     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5485   } else {
5486     iscola = *col;
5487   }
5488   if (scall != MAT_INITIAL_MATRIX) {
5489     PetscCall(PetscMalloc1(1, &aloc));
5490     aloc[0] = *A_loc;
5491   }
5492   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5493   if (!col) { /* attach global id of condensed columns */
5494     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5495   }
5496   *A_loc = aloc[0];
5497   PetscCall(PetscFree(aloc));
5498   if (!row) PetscCall(ISDestroy(&isrowa));
5499   if (!col) PetscCall(ISDestroy(&iscola));
5500   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5501   PetscFunctionReturn(PETSC_SUCCESS);
5502 }
5503 
5504 /*
5505  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5506  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5507  * on a global size.
5508  * */
5509 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5510 {
5511   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5512   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5513   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5514   PetscMPIInt            owner;
5515   PetscSFNode           *iremote, *oiremote;
5516   const PetscInt        *lrowindices;
5517   PetscSF                sf, osf;
5518   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5519   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5520   MPI_Comm               comm;
5521   ISLocalToGlobalMapping mapping;
5522   const PetscScalar     *pd_a, *po_a;
5523 
5524   PetscFunctionBegin;
5525   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5526   /* plocalsize is the number of roots
5527    * nrows is the number of leaves
5528    * */
5529   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5530   PetscCall(ISGetLocalSize(rows, &nrows));
5531   PetscCall(PetscCalloc1(nrows, &iremote));
5532   PetscCall(ISGetIndices(rows, &lrowindices));
5533   for (i = 0; i < nrows; i++) {
5534     /* Find a remote index and an owner for a row
5535      * The row could be local or remote
5536      * */
5537     owner = 0;
5538     lidx  = 0;
5539     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5540     iremote[i].index = lidx;
5541     iremote[i].rank  = owner;
5542   }
5543   /* Create SF to communicate how many nonzero columns for each row */
5544   PetscCall(PetscSFCreate(comm, &sf));
5545   /* SF will figure out the number of nonzero columns for each row, and their
5546    * offsets
5547    * */
5548   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5549   PetscCall(PetscSFSetFromOptions(sf));
5550   PetscCall(PetscSFSetUp(sf));
5551 
5552   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5553   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5554   PetscCall(PetscCalloc1(nrows, &pnnz));
5555   roffsets[0] = 0;
5556   roffsets[1] = 0;
5557   for (i = 0; i < plocalsize; i++) {
5558     /* diagonal */
5559     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5560     /* off-diagonal */
5561     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5562     /* compute offsets so that we relative location for each row */
5563     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5564     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5565   }
5566   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5567   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5568   /* 'r' means root, and 'l' means leaf */
5569   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5570   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5571   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5572   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5573   PetscCall(PetscSFDestroy(&sf));
5574   PetscCall(PetscFree(roffsets));
5575   PetscCall(PetscFree(nrcols));
5576   dntotalcols = 0;
5577   ontotalcols = 0;
5578   ncol        = 0;
5579   for (i = 0; i < nrows; i++) {
5580     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5581     ncol    = PetscMax(pnnz[i], ncol);
5582     /* diagonal */
5583     dntotalcols += nlcols[i * 2 + 0];
5584     /* off-diagonal */
5585     ontotalcols += nlcols[i * 2 + 1];
5586   }
5587   /* We do not need to figure the right number of columns
5588    * since all the calculations will be done by going through the raw data
5589    * */
5590   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5591   PetscCall(MatSetUp(*P_oth));
5592   PetscCall(PetscFree(pnnz));
5593   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5594   /* diagonal */
5595   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5596   /* off-diagonal */
5597   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5598   /* diagonal */
5599   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5600   /* off-diagonal */
5601   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5602   dntotalcols = 0;
5603   ontotalcols = 0;
5604   ntotalcols  = 0;
5605   for (i = 0; i < nrows; i++) {
5606     owner = 0;
5607     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5608     /* Set iremote for diag matrix */
5609     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5610       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5611       iremote[dntotalcols].rank  = owner;
5612       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5613       ilocal[dntotalcols++] = ntotalcols++;
5614     }
5615     /* off-diagonal */
5616     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5617       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5618       oiremote[ontotalcols].rank  = owner;
5619       oilocal[ontotalcols++]      = ntotalcols++;
5620     }
5621   }
5622   PetscCall(ISRestoreIndices(rows, &lrowindices));
5623   PetscCall(PetscFree(loffsets));
5624   PetscCall(PetscFree(nlcols));
5625   PetscCall(PetscSFCreate(comm, &sf));
5626   /* P serves as roots and P_oth is leaves
5627    * Diag matrix
5628    * */
5629   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5630   PetscCall(PetscSFSetFromOptions(sf));
5631   PetscCall(PetscSFSetUp(sf));
5632 
5633   PetscCall(PetscSFCreate(comm, &osf));
5634   /* off-diagonal */
5635   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5636   PetscCall(PetscSFSetFromOptions(osf));
5637   PetscCall(PetscSFSetUp(osf));
5638   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5639   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5640   /* operate on the matrix internal data to save memory */
5641   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5642   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5643   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5644   /* Convert to global indices for diag matrix */
5645   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5646   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5647   /* We want P_oth store global indices */
5648   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5649   /* Use memory scalable approach */
5650   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5651   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5652   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5653   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5654   /* Convert back to local indices */
5655   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5656   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5657   nout = 0;
5658   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5659   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5660   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5661   /* Exchange values */
5662   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5663   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5664   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5665   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5666   /* Stop PETSc from shrinking memory */
5667   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5668   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5669   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5670   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5671   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5672   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5673   PetscCall(PetscSFDestroy(&sf));
5674   PetscCall(PetscSFDestroy(&osf));
5675   PetscFunctionReturn(PETSC_SUCCESS);
5676 }
5677 
5678 /*
5679  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5680  * This supports MPIAIJ and MAIJ
5681  * */
5682 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5683 {
5684   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5685   Mat_SeqAIJ *p_oth;
5686   IS          rows, map;
5687   PetscHMapI  hamp;
5688   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5689   MPI_Comm    comm;
5690   PetscSF     sf, osf;
5691   PetscBool   has;
5692 
5693   PetscFunctionBegin;
5694   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5695   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5696   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5697    *  and then create a submatrix (that often is an overlapping matrix)
5698    * */
5699   if (reuse == MAT_INITIAL_MATRIX) {
5700     /* Use a hash table to figure out unique keys */
5701     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5702     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5703     count = 0;
5704     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5705     for (i = 0; i < a->B->cmap->n; i++) {
5706       key = a->garray[i] / dof;
5707       PetscCall(PetscHMapIHas(hamp, key, &has));
5708       if (!has) {
5709         mapping[i] = count;
5710         PetscCall(PetscHMapISet(hamp, key, count++));
5711       } else {
5712         /* Current 'i' has the same value the previous step */
5713         mapping[i] = count - 1;
5714       }
5715     }
5716     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5717     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5718     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5719     PetscCall(PetscCalloc1(htsize, &rowindices));
5720     off = 0;
5721     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5722     PetscCall(PetscHMapIDestroy(&hamp));
5723     PetscCall(PetscSortInt(htsize, rowindices));
5724     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5725     /* In case, the matrix was already created but users want to recreate the matrix */
5726     PetscCall(MatDestroy(P_oth));
5727     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5728     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5729     PetscCall(ISDestroy(&map));
5730     PetscCall(ISDestroy(&rows));
5731   } else if (reuse == MAT_REUSE_MATRIX) {
5732     /* If matrix was already created, we simply update values using SF objects
5733      * that as attached to the matrix earlier.
5734      */
5735     const PetscScalar *pd_a, *po_a;
5736 
5737     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5738     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5739     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5740     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5741     /* Update values in place */
5742     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5743     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5744     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5745     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5746     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5747     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5748     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5749     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5750   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5751   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5752   PetscFunctionReturn(PETSC_SUCCESS);
5753 }
5754 
5755 /*@C
5756   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5757 
5758   Collective
5759 
5760   Input Parameters:
5761 + A     - the first matrix in `MATMPIAIJ` format
5762 . B     - the second matrix in `MATMPIAIJ` format
5763 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5764 
5765   Output Parameters:
5766 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5767 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5768 - B_seq - the sequential matrix generated
5769 
5770   Level: developer
5771 
5772 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5773 @*/
5774 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5775 {
5776   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5777   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5778   IS          isrowb, iscolb;
5779   Mat        *bseq = NULL;
5780 
5781   PetscFunctionBegin;
5782   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5783              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5784   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5785 
5786   if (scall == MAT_INITIAL_MATRIX) {
5787     start = A->cmap->rstart;
5788     cmap  = a->garray;
5789     nzA   = a->A->cmap->n;
5790     nzB   = a->B->cmap->n;
5791     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5792     ncols = 0;
5793     for (i = 0; i < nzB; i++) { /* row < local row index */
5794       if (cmap[i] < start) idx[ncols++] = cmap[i];
5795       else break;
5796     }
5797     imark = i;
5798     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5799     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5800     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5801     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5802   } else {
5803     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5804     isrowb = *rowb;
5805     iscolb = *colb;
5806     PetscCall(PetscMalloc1(1, &bseq));
5807     bseq[0] = *B_seq;
5808   }
5809   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5810   *B_seq = bseq[0];
5811   PetscCall(PetscFree(bseq));
5812   if (!rowb) {
5813     PetscCall(ISDestroy(&isrowb));
5814   } else {
5815     *rowb = isrowb;
5816   }
5817   if (!colb) {
5818     PetscCall(ISDestroy(&iscolb));
5819   } else {
5820     *colb = iscolb;
5821   }
5822   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5823   PetscFunctionReturn(PETSC_SUCCESS);
5824 }
5825 
5826 /*
5827     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5828     of the OFF-DIAGONAL portion of local A
5829 
5830     Collective
5831 
5832    Input Parameters:
5833 +    A,B - the matrices in `MATMPIAIJ` format
5834 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5835 
5836    Output Parameter:
5837 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5838 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5839 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5840 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5841 
5842     Developer Note:
5843     This directly accesses information inside the VecScatter associated with the matrix-vector product
5844      for this matrix. This is not desirable..
5845 
5846     Level: developer
5847 
5848 */
5849 
5850 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5851 {
5852   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5853   VecScatter         ctx;
5854   MPI_Comm           comm;
5855   const PetscMPIInt *rprocs, *sprocs;
5856   PetscMPIInt        nrecvs, nsends;
5857   const PetscInt    *srow, *rstarts, *sstarts;
5858   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5859   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5860   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5861   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5862   PetscMPIInt        size, tag, rank, nreqs;
5863 
5864   PetscFunctionBegin;
5865   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5866   PetscCallMPI(MPI_Comm_size(comm, &size));
5867 
5868   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5869              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5870   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5871   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5872 
5873   if (size == 1) {
5874     startsj_s = NULL;
5875     bufa_ptr  = NULL;
5876     *B_oth    = NULL;
5877     PetscFunctionReturn(PETSC_SUCCESS);
5878   }
5879 
5880   ctx = a->Mvctx;
5881   tag = ((PetscObject)ctx)->tag;
5882 
5883   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5884   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5885   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5886   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5887   PetscCall(PetscMalloc1(nreqs, &reqs));
5888   rwaits = reqs;
5889   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5890 
5891   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5892   if (scall == MAT_INITIAL_MATRIX) {
5893     /* i-array */
5894     /*  post receives */
5895     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5896     for (i = 0; i < nrecvs; i++) {
5897       rowlen = rvalues + rstarts[i] * rbs;
5898       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5899       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5900     }
5901 
5902     /* pack the outgoing message */
5903     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5904 
5905     sstartsj[0] = 0;
5906     rstartsj[0] = 0;
5907     len         = 0; /* total length of j or a array to be sent */
5908     if (nsends) {
5909       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5910       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5911     }
5912     for (i = 0; i < nsends; i++) {
5913       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5914       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5915       for (j = 0; j < nrows; j++) {
5916         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5917         for (l = 0; l < sbs; l++) {
5918           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5919 
5920           rowlen[j * sbs + l] = ncols;
5921 
5922           len += ncols;
5923           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5924         }
5925         k++;
5926       }
5927       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5928 
5929       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5930     }
5931     /* recvs and sends of i-array are completed */
5932     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5933     PetscCall(PetscFree(svalues));
5934 
5935     /* allocate buffers for sending j and a arrays */
5936     PetscCall(PetscMalloc1(len + 1, &bufj));
5937     PetscCall(PetscMalloc1(len + 1, &bufa));
5938 
5939     /* create i-array of B_oth */
5940     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5941 
5942     b_othi[0] = 0;
5943     len       = 0; /* total length of j or a array to be received */
5944     k         = 0;
5945     for (i = 0; i < nrecvs; i++) {
5946       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5947       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5948       for (j = 0; j < nrows; j++) {
5949         b_othi[k + 1] = b_othi[k] + rowlen[j];
5950         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5951         k++;
5952       }
5953       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5954     }
5955     PetscCall(PetscFree(rvalues));
5956 
5957     /* allocate space for j and a arrays of B_oth */
5958     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5959     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5960 
5961     /* j-array */
5962     /*  post receives of j-array */
5963     for (i = 0; i < nrecvs; i++) {
5964       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5965       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5966     }
5967 
5968     /* pack the outgoing message j-array */
5969     if (nsends) k = sstarts[0];
5970     for (i = 0; i < nsends; i++) {
5971       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5972       bufJ  = bufj + sstartsj[i];
5973       for (j = 0; j < nrows; j++) {
5974         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5975         for (ll = 0; ll < sbs; ll++) {
5976           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5977           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5978           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5979         }
5980       }
5981       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5982     }
5983 
5984     /* recvs and sends of j-array are completed */
5985     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5986   } else if (scall == MAT_REUSE_MATRIX) {
5987     sstartsj = *startsj_s;
5988     rstartsj = *startsj_r;
5989     bufa     = *bufa_ptr;
5990     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5991   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5992 
5993   /* a-array */
5994   /*  post receives of a-array */
5995   for (i = 0; i < nrecvs; i++) {
5996     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5997     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5998   }
5999 
6000   /* pack the outgoing message a-array */
6001   if (nsends) k = sstarts[0];
6002   for (i = 0; i < nsends; i++) {
6003     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
6004     bufA  = bufa + sstartsj[i];
6005     for (j = 0; j < nrows; j++) {
6006       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
6007       for (ll = 0; ll < sbs; ll++) {
6008         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6009         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
6010         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6011       }
6012     }
6013     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6014   }
6015   /* recvs and sends of a-array are completed */
6016   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6017   PetscCall(PetscFree(reqs));
6018 
6019   if (scall == MAT_INITIAL_MATRIX) {
6020     Mat_SeqAIJ *b_oth;
6021 
6022     /* put together the new matrix */
6023     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6024 
6025     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6026     /* Since these are PETSc arrays, change flags to free them as necessary. */
6027     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6028     b_oth->free_a  = PETSC_TRUE;
6029     b_oth->free_ij = PETSC_TRUE;
6030     b_oth->nonew   = 0;
6031 
6032     PetscCall(PetscFree(bufj));
6033     if (!startsj_s || !bufa_ptr) {
6034       PetscCall(PetscFree2(sstartsj, rstartsj));
6035       PetscCall(PetscFree(bufa_ptr));
6036     } else {
6037       *startsj_s = sstartsj;
6038       *startsj_r = rstartsj;
6039       *bufa_ptr  = bufa;
6040     }
6041   } else if (scall == MAT_REUSE_MATRIX) {
6042     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6043   }
6044 
6045   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6046   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6047   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6048   PetscFunctionReturn(PETSC_SUCCESS);
6049 }
6050 
6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6054 #if defined(PETSC_HAVE_MKL_SPARSE)
6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6056 #endif
6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6059 #if defined(PETSC_HAVE_ELEMENTAL)
6060 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6061 #endif
6062 #if defined(PETSC_HAVE_SCALAPACK)
6063 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6064 #endif
6065 #if defined(PETSC_HAVE_HYPRE)
6066 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6067 #endif
6068 #if defined(PETSC_HAVE_CUDA)
6069 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6070 #endif
6071 #if defined(PETSC_HAVE_HIP)
6072 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6073 #endif
6074 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6075 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6076 #endif
6077 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6078 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6079 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6080 
6081 /*
6082     Computes (B'*A')' since computing B*A directly is untenable
6083 
6084                n                       p                          p
6085         [             ]       [             ]         [                 ]
6086       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6087         [             ]       [             ]         [                 ]
6088 
6089 */
6090 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6091 {
6092   Mat At, Bt, Ct;
6093 
6094   PetscFunctionBegin;
6095   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6096   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6097   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6098   PetscCall(MatDestroy(&At));
6099   PetscCall(MatDestroy(&Bt));
6100   PetscCall(MatTransposeSetPrecursor(Ct, C));
6101   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6102   PetscCall(MatDestroy(&Ct));
6103   PetscFunctionReturn(PETSC_SUCCESS);
6104 }
6105 
6106 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6107 {
6108   PetscBool cisdense;
6109 
6110   PetscFunctionBegin;
6111   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6112   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6113   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6114   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6115   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6116   PetscCall(MatSetUp(C));
6117 
6118   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6119   PetscFunctionReturn(PETSC_SUCCESS);
6120 }
6121 
6122 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6123 {
6124   Mat_Product *product = C->product;
6125   Mat          A = product->A, B = product->B;
6126 
6127   PetscFunctionBegin;
6128   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6129              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6130   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6131   C->ops->productsymbolic = MatProductSymbolic_AB;
6132   PetscFunctionReturn(PETSC_SUCCESS);
6133 }
6134 
6135 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6136 {
6137   Mat_Product *product = C->product;
6138 
6139   PetscFunctionBegin;
6140   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6141   PetscFunctionReturn(PETSC_SUCCESS);
6142 }
6143 
6144 /*
6145    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6146 
6147   Input Parameters:
6148 
6149     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6150     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6151 
6152     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6153 
6154     For Set1, j1[] contains column indices of the nonzeros.
6155     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6156     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6157     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6158 
6159     Similar for Set2.
6160 
6161     This routine merges the two sets of nonzeros row by row and removes repeats.
6162 
6163   Output Parameters: (memory is allocated by the caller)
6164 
6165     i[],j[]: the CSR of the merged matrix, which has m rows.
6166     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6167     imap2[]: similar to imap1[], but for Set2.
6168     Note we order nonzeros row-by-row and from left to right.
6169 */
6170 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6171 {
6172   PetscInt   r, m; /* Row index of mat */
6173   PetscCount t, t1, t2, b1, e1, b2, e2;
6174 
6175   PetscFunctionBegin;
6176   PetscCall(MatGetLocalSize(mat, &m, NULL));
6177   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6178   i[0]        = 0;
6179   for (r = 0; r < m; r++) { /* Do row by row merging */
6180     b1 = rowBegin1[r];
6181     e1 = rowEnd1[r];
6182     b2 = rowBegin2[r];
6183     e2 = rowEnd2[r];
6184     while (b1 < e1 && b2 < e2) {
6185       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6186         j[t]      = j1[b1];
6187         imap1[t1] = t;
6188         imap2[t2] = t;
6189         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6190         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6191         t1++;
6192         t2++;
6193         t++;
6194       } else if (j1[b1] < j2[b2]) {
6195         j[t]      = j1[b1];
6196         imap1[t1] = t;
6197         b1 += jmap1[t1 + 1] - jmap1[t1];
6198         t1++;
6199         t++;
6200       } else {
6201         j[t]      = j2[b2];
6202         imap2[t2] = t;
6203         b2 += jmap2[t2 + 1] - jmap2[t2];
6204         t2++;
6205         t++;
6206       }
6207     }
6208     /* Merge the remaining in either j1[] or j2[] */
6209     while (b1 < e1) {
6210       j[t]      = j1[b1];
6211       imap1[t1] = t;
6212       b1 += jmap1[t1 + 1] - jmap1[t1];
6213       t1++;
6214       t++;
6215     }
6216     while (b2 < e2) {
6217       j[t]      = j2[b2];
6218       imap2[t2] = t;
6219       b2 += jmap2[t2 + 1] - jmap2[t2];
6220       t2++;
6221       t++;
6222     }
6223     PetscCall(PetscIntCast(t, i + r + 1));
6224   }
6225   PetscFunctionReturn(PETSC_SUCCESS);
6226 }
6227 
6228 /*
6229   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6230 
6231   Input Parameters:
6232     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6233     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6234       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6235 
6236       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6237       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6238 
6239   Output Parameters:
6240     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6241     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6242       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6243       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6244 
6245     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6246       Atot: number of entries belonging to the diagonal block.
6247       Annz: number of unique nonzeros belonging to the diagonal block.
6248       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6249         repeats (i.e., same 'i,j' pair).
6250       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6251         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6252 
6253       Atot: number of entries belonging to the diagonal block
6254       Annz: number of unique nonzeros belonging to the diagonal block.
6255 
6256     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6257 
6258     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6259 */
6260 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6261 {
6262   PetscInt    cstart, cend, rstart, rend, row, col;
6263   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6264   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6265   PetscCount  k, m, p, q, r, s, mid;
6266   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6267 
6268   PetscFunctionBegin;
6269   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6270   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6271   m = rend - rstart;
6272 
6273   /* Skip negative rows */
6274   for (k = 0; k < n; k++)
6275     if (i[k] >= 0) break;
6276 
6277   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6278      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6279   */
6280   while (k < n) {
6281     row = i[k];
6282     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6283     for (s = k; s < n; s++)
6284       if (i[s] != row) break;
6285 
6286     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6287     for (p = k; p < s; p++) {
6288       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6289       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6290     }
6291     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6292     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6293     rowBegin[row - rstart] = k;
6294     rowMid[row - rstart]   = mid;
6295     rowEnd[row - rstart]   = s;
6296 
6297     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6298     Atot += mid - k;
6299     Btot += s - mid;
6300 
6301     /* Count unique nonzeros of this diag row */
6302     for (p = k; p < mid;) {
6303       col = j[p];
6304       do {
6305         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6306         p++;
6307       } while (p < mid && j[p] == col);
6308       Annz++;
6309     }
6310 
6311     /* Count unique nonzeros of this offdiag row */
6312     for (p = mid; p < s;) {
6313       col = j[p];
6314       do {
6315         p++;
6316       } while (p < s && j[p] == col);
6317       Bnnz++;
6318     }
6319     k = s;
6320   }
6321 
6322   /* Allocation according to Atot, Btot, Annz, Bnnz */
6323   PetscCall(PetscMalloc1(Atot, &Aperm));
6324   PetscCall(PetscMalloc1(Btot, &Bperm));
6325   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6326   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6327 
6328   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6329   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6330   for (r = 0; r < m; r++) {
6331     k   = rowBegin[r];
6332     mid = rowMid[r];
6333     s   = rowEnd[r];
6334     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6335     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6336     Atot += mid - k;
6337     Btot += s - mid;
6338 
6339     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6340     for (p = k; p < mid;) {
6341       col = j[p];
6342       q   = p;
6343       do {
6344         p++;
6345       } while (p < mid && j[p] == col);
6346       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6347       Annz++;
6348     }
6349 
6350     for (p = mid; p < s;) {
6351       col = j[p];
6352       q   = p;
6353       do {
6354         p++;
6355       } while (p < s && j[p] == col);
6356       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6357       Bnnz++;
6358     }
6359   }
6360   /* Output */
6361   *Aperm_ = Aperm;
6362   *Annz_  = Annz;
6363   *Atot_  = Atot;
6364   *Ajmap_ = Ajmap;
6365   *Bperm_ = Bperm;
6366   *Bnnz_  = Bnnz;
6367   *Btot_  = Btot;
6368   *Bjmap_ = Bjmap;
6369   PetscFunctionReturn(PETSC_SUCCESS);
6370 }
6371 
6372 /*
6373   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6374 
6375   Input Parameters:
6376     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6377     nnz:  number of unique nonzeros in the merged matrix
6378     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6379     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6380 
6381   Output Parameter: (memory is allocated by the caller)
6382     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6383 
6384   Example:
6385     nnz1 = 4
6386     nnz  = 6
6387     imap = [1,3,4,5]
6388     jmap = [0,3,5,6,7]
6389    then,
6390     jmap_new = [0,0,3,3,5,6,7]
6391 */
6392 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6393 {
6394   PetscCount k, p;
6395 
6396   PetscFunctionBegin;
6397   jmap_new[0] = 0;
6398   p           = nnz;                /* p loops over jmap_new[] backwards */
6399   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6400     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6401   }
6402   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6403   PetscFunctionReturn(PETSC_SUCCESS);
6404 }
6405 
6406 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data)
6407 {
6408   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data;
6409 
6410   PetscFunctionBegin;
6411   PetscCall(PetscSFDestroy(&coo->sf));
6412   PetscCall(PetscFree(coo->Aperm1));
6413   PetscCall(PetscFree(coo->Bperm1));
6414   PetscCall(PetscFree(coo->Ajmap1));
6415   PetscCall(PetscFree(coo->Bjmap1));
6416   PetscCall(PetscFree(coo->Aimap2));
6417   PetscCall(PetscFree(coo->Bimap2));
6418   PetscCall(PetscFree(coo->Aperm2));
6419   PetscCall(PetscFree(coo->Bperm2));
6420   PetscCall(PetscFree(coo->Ajmap2));
6421   PetscCall(PetscFree(coo->Bjmap2));
6422   PetscCall(PetscFree(coo->Cperm1));
6423   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6424   PetscCall(PetscFree(coo));
6425   PetscFunctionReturn(PETSC_SUCCESS);
6426 }
6427 
6428 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6429 {
6430   MPI_Comm             comm;
6431   PetscMPIInt          rank, size;
6432   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6433   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6434   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6435   PetscContainer       container;
6436   MatCOOStruct_MPIAIJ *coo;
6437 
6438   PetscFunctionBegin;
6439   PetscCall(PetscFree(mpiaij->garray));
6440   PetscCall(VecDestroy(&mpiaij->lvec));
6441 #if defined(PETSC_USE_CTABLE)
6442   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6443 #else
6444   PetscCall(PetscFree(mpiaij->colmap));
6445 #endif
6446   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6447   mat->assembled     = PETSC_FALSE;
6448   mat->was_assembled = PETSC_FALSE;
6449 
6450   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6451   PetscCallMPI(MPI_Comm_size(comm, &size));
6452   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6453   PetscCall(PetscLayoutSetUp(mat->rmap));
6454   PetscCall(PetscLayoutSetUp(mat->cmap));
6455   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6456   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6457   PetscCall(MatGetLocalSize(mat, &m, &n));
6458   PetscCall(MatGetSize(mat, &M, &N));
6459 
6460   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6461   /* entries come first, then local rows, then remote rows.                     */
6462   PetscCount n1 = coo_n, *perm1;
6463   PetscInt  *i1 = coo_i, *j1 = coo_j;
6464 
6465   PetscCall(PetscMalloc1(n1, &perm1));
6466   for (k = 0; k < n1; k++) perm1[k] = k;
6467 
6468   /* Manipulate indices so that entries with negative row or col indices will have smallest
6469      row indices, local entries will have greater but negative row indices, and remote entries
6470      will have positive row indices.
6471   */
6472   for (k = 0; k < n1; k++) {
6473     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6474     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6475     else {
6476       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6477       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6478     }
6479   }
6480 
6481   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6482   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6483 
6484   /* Advance k to the first entry we need to take care of */
6485   for (k = 0; k < n1; k++)
6486     if (i1[k] > PETSC_INT_MIN) break;
6487   PetscCount i1start = k;
6488 
6489   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6490   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6491 
6492   /*           Send remote rows to their owner                                  */
6493   /* Find which rows should be sent to which remote ranks*/
6494   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6495   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6496   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6497   const PetscInt *ranges;
6498   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6499 
6500   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6501   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6502   for (k = rem; k < n1;) {
6503     PetscMPIInt owner;
6504     PetscInt    firstRow, lastRow;
6505 
6506     /* Locate a row range */
6507     firstRow = i1[k]; /* first row of this owner */
6508     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6509     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6510 
6511     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6512     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6513 
6514     /* All entries in [k,p) belong to this remote owner */
6515     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6516       PetscMPIInt *sendto2;
6517       PetscInt    *nentries2;
6518       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6519 
6520       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6521       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6522       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6523       PetscCall(PetscFree2(sendto, nentries2));
6524       sendto   = sendto2;
6525       nentries = nentries2;
6526       maxNsend = maxNsend2;
6527     }
6528     sendto[nsend] = owner;
6529     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6530     nsend++;
6531     k = p;
6532   }
6533 
6534   /* Build 1st SF to know offsets on remote to send data */
6535   PetscSF      sf1;
6536   PetscInt     nroots = 1, nroots2 = 0;
6537   PetscInt     nleaves = nsend, nleaves2 = 0;
6538   PetscInt    *offsets;
6539   PetscSFNode *iremote;
6540 
6541   PetscCall(PetscSFCreate(comm, &sf1));
6542   PetscCall(PetscMalloc1(nsend, &iremote));
6543   PetscCall(PetscMalloc1(nsend, &offsets));
6544   for (k = 0; k < nsend; k++) {
6545     iremote[k].rank  = sendto[k];
6546     iremote[k].index = 0;
6547     nleaves2 += nentries[k];
6548     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6549   }
6550   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6551   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6552   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6553   PetscCall(PetscSFDestroy(&sf1));
6554   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6555 
6556   /* Build 2nd SF to send remote COOs to their owner */
6557   PetscSF sf2;
6558   nroots  = nroots2;
6559   nleaves = nleaves2;
6560   PetscCall(PetscSFCreate(comm, &sf2));
6561   PetscCall(PetscSFSetFromOptions(sf2));
6562   PetscCall(PetscMalloc1(nleaves, &iremote));
6563   p = 0;
6564   for (k = 0; k < nsend; k++) {
6565     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6566     for (q = 0; q < nentries[k]; q++, p++) {
6567       iremote[p].rank = sendto[k];
6568       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6569     }
6570   }
6571   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6572 
6573   /* Send the remote COOs to their owner */
6574   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6575   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6576   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6577   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6578   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6579   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6580   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6581   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6582   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6583   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6584   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6585 
6586   PetscCall(PetscFree(offsets));
6587   PetscCall(PetscFree2(sendto, nentries));
6588 
6589   /* Sort received COOs by row along with the permutation array     */
6590   for (k = 0; k < n2; k++) perm2[k] = k;
6591   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6592 
6593   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6594   PetscCount *Cperm1;
6595   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6596   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6597   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6598   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6599 
6600   /* Support for HYPRE matrices, kind of a hack.
6601      Swap min column with diagonal so that diagonal values will go first */
6602   PetscBool hypre;
6603   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6604   if (hypre) {
6605     PetscInt *minj;
6606     PetscBT   hasdiag;
6607 
6608     PetscCall(PetscBTCreate(m, &hasdiag));
6609     PetscCall(PetscMalloc1(m, &minj));
6610     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6611     for (k = i1start; k < rem; k++) {
6612       if (j1[k] < cstart || j1[k] >= cend) continue;
6613       const PetscInt rindex = i1[k] - rstart;
6614       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6615       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6616     }
6617     for (k = 0; k < n2; k++) {
6618       if (j2[k] < cstart || j2[k] >= cend) continue;
6619       const PetscInt rindex = i2[k] - rstart;
6620       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6621       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6622     }
6623     for (k = i1start; k < rem; k++) {
6624       const PetscInt rindex = i1[k] - rstart;
6625       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6626       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6627       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6628     }
6629     for (k = 0; k < n2; k++) {
6630       const PetscInt rindex = i2[k] - rstart;
6631       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6632       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6633       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6634     }
6635     PetscCall(PetscBTDestroy(&hasdiag));
6636     PetscCall(PetscFree(minj));
6637   }
6638 
6639   /* Split local COOs and received COOs into diag/offdiag portions */
6640   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6641   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6642   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6643   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6644   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6645   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6646 
6647   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6648   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6649   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6650   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6651 
6652   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6653   PetscInt *Ai, *Bi;
6654   PetscInt *Aj, *Bj;
6655 
6656   PetscCall(PetscMalloc1(m + 1, &Ai));
6657   PetscCall(PetscMalloc1(m + 1, &Bi));
6658   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6659   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6660 
6661   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6662   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6663   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6664   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6665   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6666 
6667   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6668   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6669 
6670   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6671   /* expect nonzeros in A/B most likely have local contributing entries        */
6672   PetscInt    Annz = Ai[m];
6673   PetscInt    Bnnz = Bi[m];
6674   PetscCount *Ajmap1_new, *Bjmap1_new;
6675 
6676   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6677   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6678 
6679   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6680   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6681 
6682   PetscCall(PetscFree(Aimap1));
6683   PetscCall(PetscFree(Ajmap1));
6684   PetscCall(PetscFree(Bimap1));
6685   PetscCall(PetscFree(Bjmap1));
6686   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6687   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6688   PetscCall(PetscFree(perm1));
6689   PetscCall(PetscFree3(i2, j2, perm2));
6690 
6691   Ajmap1 = Ajmap1_new;
6692   Bjmap1 = Bjmap1_new;
6693 
6694   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6695   if (Annz < Annz1 + Annz2) {
6696     PetscInt *Aj_new;
6697     PetscCall(PetscMalloc1(Annz, &Aj_new));
6698     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6699     PetscCall(PetscFree(Aj));
6700     Aj = Aj_new;
6701   }
6702 
6703   if (Bnnz < Bnnz1 + Bnnz2) {
6704     PetscInt *Bj_new;
6705     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6706     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6707     PetscCall(PetscFree(Bj));
6708     Bj = Bj_new;
6709   }
6710 
6711   /* Create new submatrices for on-process and off-process coupling                  */
6712   PetscScalar     *Aa, *Ba;
6713   MatType          rtype;
6714   Mat_SeqAIJ      *a, *b;
6715   PetscObjectState state;
6716   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6717   PetscCall(PetscCalloc1(Bnnz, &Ba));
6718   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6719   if (cstart) {
6720     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6721   }
6722 
6723   PetscCall(MatGetRootType_Private(mat, &rtype));
6724 
6725   MatSeqXAIJGetOptions_Private(mpiaij->A);
6726   PetscCall(MatDestroy(&mpiaij->A));
6727   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6728   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6729   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6730 
6731   MatSeqXAIJGetOptions_Private(mpiaij->B);
6732   PetscCall(MatDestroy(&mpiaij->B));
6733   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6734   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6735   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6736 
6737   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6738   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6739   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6740   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6741 
6742   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6743   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6744   a->free_a  = PETSC_TRUE;
6745   a->free_ij = PETSC_TRUE;
6746   b->free_a  = PETSC_TRUE;
6747   b->free_ij = PETSC_TRUE;
6748   a->maxnz   = a->nz;
6749   b->maxnz   = b->nz;
6750 
6751   /* conversion must happen AFTER multiply setup */
6752   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6753   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6754   PetscCall(VecDestroy(&mpiaij->lvec));
6755   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6756 
6757   // Put the COO struct in a container and then attach that to the matrix
6758   PetscCall(PetscMalloc1(1, &coo));
6759   coo->n       = coo_n;
6760   coo->sf      = sf2;
6761   coo->sendlen = nleaves;
6762   coo->recvlen = nroots;
6763   coo->Annz    = Annz;
6764   coo->Bnnz    = Bnnz;
6765   coo->Annz2   = Annz2;
6766   coo->Bnnz2   = Bnnz2;
6767   coo->Atot1   = Atot1;
6768   coo->Atot2   = Atot2;
6769   coo->Btot1   = Btot1;
6770   coo->Btot2   = Btot2;
6771   coo->Ajmap1  = Ajmap1;
6772   coo->Aperm1  = Aperm1;
6773   coo->Bjmap1  = Bjmap1;
6774   coo->Bperm1  = Bperm1;
6775   coo->Aimap2  = Aimap2;
6776   coo->Ajmap2  = Ajmap2;
6777   coo->Aperm2  = Aperm2;
6778   coo->Bimap2  = Bimap2;
6779   coo->Bjmap2  = Bjmap2;
6780   coo->Bperm2  = Bperm2;
6781   coo->Cperm1  = Cperm1;
6782   // Allocate in preallocation. If not used, it has zero cost on host
6783   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6784   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6785   PetscCall(PetscContainerSetPointer(container, coo));
6786   PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ));
6787   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6788   PetscCall(PetscContainerDestroy(&container));
6789   PetscFunctionReturn(PETSC_SUCCESS);
6790 }
6791 
6792 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6793 {
6794   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6795   Mat                  A = mpiaij->A, B = mpiaij->B;
6796   PetscScalar         *Aa, *Ba;
6797   PetscScalar         *sendbuf, *recvbuf;
6798   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6799   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6800   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6801   const PetscCount    *Cperm1;
6802   PetscContainer       container;
6803   MatCOOStruct_MPIAIJ *coo;
6804 
6805   PetscFunctionBegin;
6806   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6807   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6808   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6809   sendbuf = coo->sendbuf;
6810   recvbuf = coo->recvbuf;
6811   Ajmap1  = coo->Ajmap1;
6812   Ajmap2  = coo->Ajmap2;
6813   Aimap2  = coo->Aimap2;
6814   Bjmap1  = coo->Bjmap1;
6815   Bjmap2  = coo->Bjmap2;
6816   Bimap2  = coo->Bimap2;
6817   Aperm1  = coo->Aperm1;
6818   Aperm2  = coo->Aperm2;
6819   Bperm1  = coo->Bperm1;
6820   Bperm2  = coo->Bperm2;
6821   Cperm1  = coo->Cperm1;
6822 
6823   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6824   PetscCall(MatSeqAIJGetArray(B, &Ba));
6825 
6826   /* Pack entries to be sent to remote */
6827   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6828 
6829   /* Send remote entries to their owner and overlap the communication with local computation */
6830   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6831   /* Add local entries to A and B */
6832   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6833     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6834     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6835     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6836   }
6837   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6838     PetscScalar sum = 0.0;
6839     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6840     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6841   }
6842   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6843 
6844   /* Add received remote entries to A and B */
6845   for (PetscCount i = 0; i < coo->Annz2; i++) {
6846     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6847   }
6848   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6849     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6850   }
6851   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6852   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6853   PetscFunctionReturn(PETSC_SUCCESS);
6854 }
6855 
6856 /*MC
6857    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6858 
6859    Options Database Keys:
6860 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6861 
6862    Level: beginner
6863 
6864    Notes:
6865    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6866     in this case the values associated with the rows and columns one passes in are set to zero
6867     in the matrix
6868 
6869     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6870     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6871 
6872 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6873 M*/
6874 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6875 {
6876   Mat_MPIAIJ *b;
6877   PetscMPIInt size;
6878 
6879   PetscFunctionBegin;
6880   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6881 
6882   PetscCall(PetscNew(&b));
6883   B->data       = (void *)b;
6884   B->ops[0]     = MatOps_Values;
6885   B->assembled  = PETSC_FALSE;
6886   B->insertmode = NOT_SET_VALUES;
6887   b->size       = size;
6888 
6889   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6890 
6891   /* build cache for off array entries formed */
6892   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6893 
6894   b->donotstash  = PETSC_FALSE;
6895   b->colmap      = NULL;
6896   b->garray      = NULL;
6897   b->roworiented = PETSC_TRUE;
6898 
6899   /* stuff used for matrix vector multiply */
6900   b->lvec  = NULL;
6901   b->Mvctx = NULL;
6902 
6903   /* stuff for MatGetRow() */
6904   b->rowindices   = NULL;
6905   b->rowvalues    = NULL;
6906   b->getrowactive = PETSC_FALSE;
6907 
6908   /* flexible pointer used in CUSPARSE classes */
6909   b->spptr = NULL;
6910 
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6913   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ));
6918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6922 #if defined(PETSC_HAVE_CUDA)
6923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6924 #endif
6925 #if defined(PETSC_HAVE_HIP)
6926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6927 #endif
6928 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6930 #endif
6931 #if defined(PETSC_HAVE_MKL_SPARSE)
6932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6933 #endif
6934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6938 #if defined(PETSC_HAVE_ELEMENTAL)
6939   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6940 #endif
6941 #if defined(PETSC_HAVE_SCALAPACK)
6942   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6943 #endif
6944   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6945   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6946 #if defined(PETSC_HAVE_HYPRE)
6947   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6948   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6949 #endif
6950   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6951   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6952   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6953   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6954   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6955   PetscFunctionReturn(PETSC_SUCCESS);
6956 }
6957 
6958 /*@
6959   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6960   and "off-diagonal" part of the matrix in CSR format.
6961 
6962   Collective
6963 
6964   Input Parameters:
6965 + comm - MPI communicator
6966 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6967 . n    - This value should be the same as the local size used in creating the
6968          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6969          calculated if `N` is given) For square matrices `n` is almost always `m`.
6970 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6971 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6972 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6973 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6974 . a    - matrix values
6975 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6976 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6977 - oa   - matrix values
6978 
6979   Output Parameter:
6980 . mat - the matrix
6981 
6982   Level: advanced
6983 
6984   Notes:
6985   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6986   must free the arrays once the matrix has been destroyed and not before.
6987 
6988   The `i` and `j` indices are 0 based
6989 
6990   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6991 
6992   This sets local rows and cannot be used to set off-processor values.
6993 
6994   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6995   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6996   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6997   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6998   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6999   communication if it is known that only local entries will be set.
7000 
7001 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
7002           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
7003 @*/
7004 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
7005 {
7006   Mat_MPIAIJ *maij;
7007 
7008   PetscFunctionBegin;
7009   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
7010   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
7011   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
7012   PetscCall(MatCreate(comm, mat));
7013   PetscCall(MatSetSizes(*mat, m, n, M, N));
7014   PetscCall(MatSetType(*mat, MATMPIAIJ));
7015   maij = (Mat_MPIAIJ *)(*mat)->data;
7016 
7017   (*mat)->preallocated = PETSC_TRUE;
7018 
7019   PetscCall(PetscLayoutSetUp((*mat)->rmap));
7020   PetscCall(PetscLayoutSetUp((*mat)->cmap));
7021 
7022   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
7023   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7024 
7025   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7026   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7027   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7028   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7029   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7030   PetscFunctionReturn(PETSC_SUCCESS);
7031 }
7032 
7033 typedef struct {
7034   Mat       *mp;    /* intermediate products */
7035   PetscBool *mptmp; /* is the intermediate product temporary ? */
7036   PetscInt   cp;    /* number of intermediate products */
7037 
7038   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7039   PetscInt    *startsj_s, *startsj_r;
7040   PetscScalar *bufa;
7041   Mat          P_oth;
7042 
7043   /* may take advantage of merging product->B */
7044   Mat Bloc; /* B-local by merging diag and off-diag */
7045 
7046   /* cusparse does not have support to split between symbolic and numeric phases.
7047      When api_user is true, we don't need to update the numerical values
7048      of the temporary storage */
7049   PetscBool reusesym;
7050 
7051   /* support for COO values insertion */
7052   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7053   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7054   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7055   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7056   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7057   PetscMemType mtype;
7058 
7059   /* customization */
7060   PetscBool abmerge;
7061   PetscBool P_oth_bind;
7062 } MatMatMPIAIJBACKEND;
7063 
7064 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7065 {
7066   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7067   PetscInt             i;
7068 
7069   PetscFunctionBegin;
7070   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7071   PetscCall(PetscFree(mmdata->bufa));
7072   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7073   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7074   PetscCall(MatDestroy(&mmdata->P_oth));
7075   PetscCall(MatDestroy(&mmdata->Bloc));
7076   PetscCall(PetscSFDestroy(&mmdata->sf));
7077   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7078   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7079   PetscCall(PetscFree(mmdata->own[0]));
7080   PetscCall(PetscFree(mmdata->own));
7081   PetscCall(PetscFree(mmdata->off[0]));
7082   PetscCall(PetscFree(mmdata->off));
7083   PetscCall(PetscFree(mmdata));
7084   PetscFunctionReturn(PETSC_SUCCESS);
7085 }
7086 
7087 /* Copy selected n entries with indices in idx[] of A to v[].
7088    If idx is NULL, copy the whole data array of A to v[]
7089  */
7090 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7091 {
7092   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7093 
7094   PetscFunctionBegin;
7095   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7096   if (f) {
7097     PetscCall((*f)(A, n, idx, v));
7098   } else {
7099     const PetscScalar *vv;
7100 
7101     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7102     if (n && idx) {
7103       PetscScalar    *w  = v;
7104       const PetscInt *oi = idx;
7105       PetscInt        j;
7106 
7107       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7108     } else {
7109       PetscCall(PetscArraycpy(v, vv, n));
7110     }
7111     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7112   }
7113   PetscFunctionReturn(PETSC_SUCCESS);
7114 }
7115 
7116 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7117 {
7118   MatMatMPIAIJBACKEND *mmdata;
7119   PetscInt             i, n_d, n_o;
7120 
7121   PetscFunctionBegin;
7122   MatCheckProduct(C, 1);
7123   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7124   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7125   if (!mmdata->reusesym) { /* update temporary matrices */
7126     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7127     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7128   }
7129   mmdata->reusesym = PETSC_FALSE;
7130 
7131   for (i = 0; i < mmdata->cp; i++) {
7132     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7133     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7134   }
7135   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7136     PetscInt noff;
7137 
7138     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7139     if (mmdata->mptmp[i]) continue;
7140     if (noff) {
7141       PetscInt nown;
7142 
7143       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7144       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7145       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7146       n_o += noff;
7147       n_d += nown;
7148     } else {
7149       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7150 
7151       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7152       n_d += mm->nz;
7153     }
7154   }
7155   if (mmdata->hasoffproc) { /* offprocess insertion */
7156     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7157     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7158   }
7159   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7160   PetscFunctionReturn(PETSC_SUCCESS);
7161 }
7162 
7163 /* Support for Pt * A, A * P, or Pt * A * P */
7164 #define MAX_NUMBER_INTERMEDIATE 4
7165 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7166 {
7167   Mat_Product           *product = C->product;
7168   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7169   Mat_MPIAIJ            *a, *p;
7170   MatMatMPIAIJBACKEND   *mmdata;
7171   ISLocalToGlobalMapping P_oth_l2g = NULL;
7172   IS                     glob      = NULL;
7173   const char            *prefix;
7174   char                   pprefix[256];
7175   const PetscInt        *globidx, *P_oth_idx;
7176   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7177   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7178   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7179                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7180                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7181   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7182 
7183   MatProductType ptype;
7184   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7185   PetscMPIInt    size;
7186 
7187   PetscFunctionBegin;
7188   MatCheckProduct(C, 1);
7189   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7190   ptype = product->type;
7191   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7192     ptype                                          = MATPRODUCT_AB;
7193     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7194   }
7195   switch (ptype) {
7196   case MATPRODUCT_AB:
7197     A          = product->A;
7198     P          = product->B;
7199     m          = A->rmap->n;
7200     n          = P->cmap->n;
7201     M          = A->rmap->N;
7202     N          = P->cmap->N;
7203     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7204     break;
7205   case MATPRODUCT_AtB:
7206     P          = product->A;
7207     A          = product->B;
7208     m          = P->cmap->n;
7209     n          = A->cmap->n;
7210     M          = P->cmap->N;
7211     N          = A->cmap->N;
7212     hasoffproc = PETSC_TRUE;
7213     break;
7214   case MATPRODUCT_PtAP:
7215     A          = product->A;
7216     P          = product->B;
7217     m          = P->cmap->n;
7218     n          = P->cmap->n;
7219     M          = P->cmap->N;
7220     N          = P->cmap->N;
7221     hasoffproc = PETSC_TRUE;
7222     break;
7223   default:
7224     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7225   }
7226   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7227   if (size == 1) hasoffproc = PETSC_FALSE;
7228 
7229   /* defaults */
7230   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7231     mp[i]    = NULL;
7232     mptmp[i] = PETSC_FALSE;
7233     rmapt[i] = -1;
7234     cmapt[i] = -1;
7235     rmapa[i] = NULL;
7236     cmapa[i] = NULL;
7237   }
7238 
7239   /* customization */
7240   PetscCall(PetscNew(&mmdata));
7241   mmdata->reusesym = product->api_user;
7242   if (ptype == MATPRODUCT_AB) {
7243     if (product->api_user) {
7244       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7245       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7246       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7247       PetscOptionsEnd();
7248     } else {
7249       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7250       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7251       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7252       PetscOptionsEnd();
7253     }
7254   } else if (ptype == MATPRODUCT_PtAP) {
7255     if (product->api_user) {
7256       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7257       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7258       PetscOptionsEnd();
7259     } else {
7260       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7261       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7262       PetscOptionsEnd();
7263     }
7264   }
7265   a = (Mat_MPIAIJ *)A->data;
7266   p = (Mat_MPIAIJ *)P->data;
7267   PetscCall(MatSetSizes(C, m, n, M, N));
7268   PetscCall(PetscLayoutSetUp(C->rmap));
7269   PetscCall(PetscLayoutSetUp(C->cmap));
7270   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7271   PetscCall(MatGetOptionsPrefix(C, &prefix));
7272 
7273   cp = 0;
7274   switch (ptype) {
7275   case MATPRODUCT_AB: /* A * P */
7276     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7277 
7278     /* A_diag * P_local (merged or not) */
7279     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7280       /* P is product->B */
7281       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7282       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7283       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7284       PetscCall(MatProductSetFill(mp[cp], product->fill));
7285       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7286       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7287       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7288       mp[cp]->product->api_user = product->api_user;
7289       PetscCall(MatProductSetFromOptions(mp[cp]));
7290       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7291       PetscCall(ISGetIndices(glob, &globidx));
7292       rmapt[cp] = 1;
7293       cmapt[cp] = 2;
7294       cmapa[cp] = globidx;
7295       mptmp[cp] = PETSC_FALSE;
7296       cp++;
7297     } else { /* A_diag * P_diag and A_diag * P_off */
7298       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7299       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7300       PetscCall(MatProductSetFill(mp[cp], product->fill));
7301       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7302       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7303       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7304       mp[cp]->product->api_user = product->api_user;
7305       PetscCall(MatProductSetFromOptions(mp[cp]));
7306       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7307       rmapt[cp] = 1;
7308       cmapt[cp] = 1;
7309       mptmp[cp] = PETSC_FALSE;
7310       cp++;
7311       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7312       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7313       PetscCall(MatProductSetFill(mp[cp], product->fill));
7314       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7315       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7316       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7317       mp[cp]->product->api_user = product->api_user;
7318       PetscCall(MatProductSetFromOptions(mp[cp]));
7319       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7320       rmapt[cp] = 1;
7321       cmapt[cp] = 2;
7322       cmapa[cp] = p->garray;
7323       mptmp[cp] = PETSC_FALSE;
7324       cp++;
7325     }
7326 
7327     /* A_off * P_other */
7328     if (mmdata->P_oth) {
7329       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7330       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7331       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7332       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7333       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7334       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7335       PetscCall(MatProductSetFill(mp[cp], product->fill));
7336       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7337       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7338       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7339       mp[cp]->product->api_user = product->api_user;
7340       PetscCall(MatProductSetFromOptions(mp[cp]));
7341       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7342       rmapt[cp] = 1;
7343       cmapt[cp] = 2;
7344       cmapa[cp] = P_oth_idx;
7345       mptmp[cp] = PETSC_FALSE;
7346       cp++;
7347     }
7348     break;
7349 
7350   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7351     /* A is product->B */
7352     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7353     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7354       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7355       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7356       PetscCall(MatProductSetFill(mp[cp], product->fill));
7357       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7358       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7359       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7360       mp[cp]->product->api_user = product->api_user;
7361       PetscCall(MatProductSetFromOptions(mp[cp]));
7362       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7363       PetscCall(ISGetIndices(glob, &globidx));
7364       rmapt[cp] = 2;
7365       rmapa[cp] = globidx;
7366       cmapt[cp] = 2;
7367       cmapa[cp] = globidx;
7368       mptmp[cp] = PETSC_FALSE;
7369       cp++;
7370     } else {
7371       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7372       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7373       PetscCall(MatProductSetFill(mp[cp], product->fill));
7374       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7375       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7376       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7377       mp[cp]->product->api_user = product->api_user;
7378       PetscCall(MatProductSetFromOptions(mp[cp]));
7379       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7380       PetscCall(ISGetIndices(glob, &globidx));
7381       rmapt[cp] = 1;
7382       cmapt[cp] = 2;
7383       cmapa[cp] = globidx;
7384       mptmp[cp] = PETSC_FALSE;
7385       cp++;
7386       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7387       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7388       PetscCall(MatProductSetFill(mp[cp], product->fill));
7389       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7390       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7391       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7392       mp[cp]->product->api_user = product->api_user;
7393       PetscCall(MatProductSetFromOptions(mp[cp]));
7394       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7395       rmapt[cp] = 2;
7396       rmapa[cp] = p->garray;
7397       cmapt[cp] = 2;
7398       cmapa[cp] = globidx;
7399       mptmp[cp] = PETSC_FALSE;
7400       cp++;
7401     }
7402     break;
7403   case MATPRODUCT_PtAP:
7404     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7405     /* P is product->B */
7406     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7407     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7408     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7409     PetscCall(MatProductSetFill(mp[cp], product->fill));
7410     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7411     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7412     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7413     mp[cp]->product->api_user = product->api_user;
7414     PetscCall(MatProductSetFromOptions(mp[cp]));
7415     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7416     PetscCall(ISGetIndices(glob, &globidx));
7417     rmapt[cp] = 2;
7418     rmapa[cp] = globidx;
7419     cmapt[cp] = 2;
7420     cmapa[cp] = globidx;
7421     mptmp[cp] = PETSC_FALSE;
7422     cp++;
7423     if (mmdata->P_oth) {
7424       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7425       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7426       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7427       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7428       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7429       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7430       PetscCall(MatProductSetFill(mp[cp], product->fill));
7431       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7432       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7433       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7434       mp[cp]->product->api_user = product->api_user;
7435       PetscCall(MatProductSetFromOptions(mp[cp]));
7436       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7437       mptmp[cp] = PETSC_TRUE;
7438       cp++;
7439       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7440       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7441       PetscCall(MatProductSetFill(mp[cp], product->fill));
7442       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7443       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7444       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7445       mp[cp]->product->api_user = product->api_user;
7446       PetscCall(MatProductSetFromOptions(mp[cp]));
7447       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7448       rmapt[cp] = 2;
7449       rmapa[cp] = globidx;
7450       cmapt[cp] = 2;
7451       cmapa[cp] = P_oth_idx;
7452       mptmp[cp] = PETSC_FALSE;
7453       cp++;
7454     }
7455     break;
7456   default:
7457     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7458   }
7459   /* sanity check */
7460   if (size > 1)
7461     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7462 
7463   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7464   for (i = 0; i < cp; i++) {
7465     mmdata->mp[i]    = mp[i];
7466     mmdata->mptmp[i] = mptmp[i];
7467   }
7468   mmdata->cp             = cp;
7469   C->product->data       = mmdata;
7470   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7471   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7472 
7473   /* memory type */
7474   mmdata->mtype = PETSC_MEMTYPE_HOST;
7475   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7476   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7477   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7478   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7479   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7480   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7481 
7482   /* prepare coo coordinates for values insertion */
7483 
7484   /* count total nonzeros of those intermediate seqaij Mats
7485     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7486     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7487     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7488   */
7489   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7490     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7491     if (mptmp[cp]) continue;
7492     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7493       const PetscInt *rmap = rmapa[cp];
7494       const PetscInt  mr   = mp[cp]->rmap->n;
7495       const PetscInt  rs   = C->rmap->rstart;
7496       const PetscInt  re   = C->rmap->rend;
7497       const PetscInt *ii   = mm->i;
7498       for (i = 0; i < mr; i++) {
7499         const PetscInt gr = rmap[i];
7500         const PetscInt nz = ii[i + 1] - ii[i];
7501         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7502         else ncoo_oown += nz;                  /* this row is local */
7503       }
7504     } else ncoo_d += mm->nz;
7505   }
7506 
7507   /*
7508     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7509 
7510     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7511 
7512     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7513 
7514     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7515     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7516     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7517 
7518     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7519     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7520   */
7521   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7522   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7523 
7524   /* gather (i,j) of nonzeros inserted by remote procs */
7525   if (hasoffproc) {
7526     PetscSF  msf;
7527     PetscInt ncoo2, *coo_i2, *coo_j2;
7528 
7529     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7530     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7531     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7532 
7533     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7534       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7535       PetscInt   *idxoff = mmdata->off[cp];
7536       PetscInt   *idxown = mmdata->own[cp];
7537       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7538         const PetscInt *rmap = rmapa[cp];
7539         const PetscInt *cmap = cmapa[cp];
7540         const PetscInt *ii   = mm->i;
7541         PetscInt       *coi  = coo_i + ncoo_o;
7542         PetscInt       *coj  = coo_j + ncoo_o;
7543         const PetscInt  mr   = mp[cp]->rmap->n;
7544         const PetscInt  rs   = C->rmap->rstart;
7545         const PetscInt  re   = C->rmap->rend;
7546         const PetscInt  cs   = C->cmap->rstart;
7547         for (i = 0; i < mr; i++) {
7548           const PetscInt *jj = mm->j + ii[i];
7549           const PetscInt  gr = rmap[i];
7550           const PetscInt  nz = ii[i + 1] - ii[i];
7551           if (gr < rs || gr >= re) { /* this is an offproc row */
7552             for (j = ii[i]; j < ii[i + 1]; j++) {
7553               *coi++    = gr;
7554               *idxoff++ = j;
7555             }
7556             if (!cmapt[cp]) { /* already global */
7557               for (j = 0; j < nz; j++) *coj++ = jj[j];
7558             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7559               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7560             } else { /* offdiag */
7561               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7562             }
7563             ncoo_o += nz;
7564           } else { /* this is a local row */
7565             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7566           }
7567         }
7568       }
7569       mmdata->off[cp + 1] = idxoff;
7570       mmdata->own[cp + 1] = idxown;
7571     }
7572 
7573     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7574     PetscInt incoo_o;
7575     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7576     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7577     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7578     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7579     ncoo = ncoo_d + ncoo_oown + ncoo2;
7580     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7581     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7582     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7583     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7584     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7585     PetscCall(PetscFree2(coo_i, coo_j));
7586     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7587     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7588     coo_i = coo_i2;
7589     coo_j = coo_j2;
7590   } else { /* no offproc values insertion */
7591     ncoo = ncoo_d;
7592     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7593 
7594     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7595     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7596     PetscCall(PetscSFSetUp(mmdata->sf));
7597   }
7598   mmdata->hasoffproc = hasoffproc;
7599 
7600   /* gather (i,j) of nonzeros inserted locally */
7601   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7602     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7603     PetscInt       *coi  = coo_i + ncoo_d;
7604     PetscInt       *coj  = coo_j + ncoo_d;
7605     const PetscInt *jj   = mm->j;
7606     const PetscInt *ii   = mm->i;
7607     const PetscInt *cmap = cmapa[cp];
7608     const PetscInt *rmap = rmapa[cp];
7609     const PetscInt  mr   = mp[cp]->rmap->n;
7610     const PetscInt  rs   = C->rmap->rstart;
7611     const PetscInt  re   = C->rmap->rend;
7612     const PetscInt  cs   = C->cmap->rstart;
7613 
7614     if (mptmp[cp]) continue;
7615     if (rmapt[cp] == 1) { /* consecutive rows */
7616       /* fill coo_i */
7617       for (i = 0; i < mr; i++) {
7618         const PetscInt gr = i + rs;
7619         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7620       }
7621       /* fill coo_j */
7622       if (!cmapt[cp]) { /* type-0, already global */
7623         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7624       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7625         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7626       } else {                                            /* type-2, local to global for sparse columns */
7627         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7628       }
7629       ncoo_d += mm->nz;
7630     } else if (rmapt[cp] == 2) { /* sparse rows */
7631       for (i = 0; i < mr; i++) {
7632         const PetscInt *jj = mm->j + ii[i];
7633         const PetscInt  gr = rmap[i];
7634         const PetscInt  nz = ii[i + 1] - ii[i];
7635         if (gr >= rs && gr < re) { /* local rows */
7636           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7637           if (!cmapt[cp]) { /* type-0, already global */
7638             for (j = 0; j < nz; j++) *coj++ = jj[j];
7639           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7640             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7641           } else { /* type-2, local to global for sparse columns */
7642             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7643           }
7644           ncoo_d += nz;
7645         }
7646       }
7647     }
7648   }
7649   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7650   PetscCall(ISDestroy(&glob));
7651   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7652   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7653   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7654   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7655 
7656   /* preallocate with COO data */
7657   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7658   PetscCall(PetscFree2(coo_i, coo_j));
7659   PetscFunctionReturn(PETSC_SUCCESS);
7660 }
7661 
7662 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7663 {
7664   Mat_Product *product = mat->product;
7665 #if defined(PETSC_HAVE_DEVICE)
7666   PetscBool match  = PETSC_FALSE;
7667   PetscBool usecpu = PETSC_FALSE;
7668 #else
7669   PetscBool match = PETSC_TRUE;
7670 #endif
7671 
7672   PetscFunctionBegin;
7673   MatCheckProduct(mat, 1);
7674 #if defined(PETSC_HAVE_DEVICE)
7675   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7676   if (match) { /* we can always fallback to the CPU if requested */
7677     switch (product->type) {
7678     case MATPRODUCT_AB:
7679       if (product->api_user) {
7680         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7681         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7682         PetscOptionsEnd();
7683       } else {
7684         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7685         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7686         PetscOptionsEnd();
7687       }
7688       break;
7689     case MATPRODUCT_AtB:
7690       if (product->api_user) {
7691         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7692         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7693         PetscOptionsEnd();
7694       } else {
7695         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7696         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7697         PetscOptionsEnd();
7698       }
7699       break;
7700     case MATPRODUCT_PtAP:
7701       if (product->api_user) {
7702         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7703         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7704         PetscOptionsEnd();
7705       } else {
7706         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7707         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7708         PetscOptionsEnd();
7709       }
7710       break;
7711     default:
7712       break;
7713     }
7714     match = (PetscBool)!usecpu;
7715   }
7716 #endif
7717   if (match) {
7718     switch (product->type) {
7719     case MATPRODUCT_AB:
7720     case MATPRODUCT_AtB:
7721     case MATPRODUCT_PtAP:
7722       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7723       break;
7724     default:
7725       break;
7726     }
7727   }
7728   /* fallback to MPIAIJ ops */
7729   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7730   PetscFunctionReturn(PETSC_SUCCESS);
7731 }
7732 
7733 /*
7734    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7735 
7736    n - the number of block indices in cc[]
7737    cc - the block indices (must be large enough to contain the indices)
7738 */
7739 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7740 {
7741   PetscInt        cnt = -1, nidx, j;
7742   const PetscInt *idx;
7743 
7744   PetscFunctionBegin;
7745   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7746   if (nidx) {
7747     cnt     = 0;
7748     cc[cnt] = idx[0] / bs;
7749     for (j = 1; j < nidx; j++) {
7750       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7751     }
7752   }
7753   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7754   *n = cnt + 1;
7755   PetscFunctionReturn(PETSC_SUCCESS);
7756 }
7757 
7758 /*
7759     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7760 
7761     ncollapsed - the number of block indices
7762     collapsed - the block indices (must be large enough to contain the indices)
7763 */
7764 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7765 {
7766   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7767 
7768   PetscFunctionBegin;
7769   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7770   for (i = start + 1; i < start + bs; i++) {
7771     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7772     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7773     cprevtmp = cprev;
7774     cprev    = merged;
7775     merged   = cprevtmp;
7776   }
7777   *ncollapsed = nprev;
7778   if (collapsed) *collapsed = cprev;
7779   PetscFunctionReturn(PETSC_SUCCESS);
7780 }
7781 
7782 /*
7783  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7784 
7785  Input Parameter:
7786  . Amat - matrix
7787  - symmetrize - make the result symmetric
7788  + scale - scale with diagonal
7789 
7790  Output Parameter:
7791  . a_Gmat - output scalar graph >= 0
7792 
7793 */
7794 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7795 {
7796   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7797   MPI_Comm  comm;
7798   Mat       Gmat;
7799   PetscBool ismpiaij, isseqaij;
7800   Mat       a, b, c;
7801   MatType   jtype;
7802 
7803   PetscFunctionBegin;
7804   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7805   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7806   PetscCall(MatGetSize(Amat, &MM, &NN));
7807   PetscCall(MatGetBlockSize(Amat, &bs));
7808   nloc = (Iend - Istart) / bs;
7809 
7810   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7811   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7812   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7813 
7814   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7815   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7816      implementation */
7817   if (bs > 1) {
7818     PetscCall(MatGetType(Amat, &jtype));
7819     PetscCall(MatCreate(comm, &Gmat));
7820     PetscCall(MatSetType(Gmat, jtype));
7821     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7822     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7823     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7824       PetscInt  *d_nnz, *o_nnz;
7825       MatScalar *aa, val, *AA;
7826       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7827 
7828       if (isseqaij) {
7829         a = Amat;
7830         b = NULL;
7831       } else {
7832         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7833         a             = d->A;
7834         b             = d->B;
7835       }
7836       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7837       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7838       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7839         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7840         const PetscInt *cols1, *cols2;
7841 
7842         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7843           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7844           nnz[brow / bs] = nc2 / bs;
7845           if (nc2 % bs) ok = 0;
7846           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7847           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7848             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7849             if (nc1 != nc2) ok = 0;
7850             else {
7851               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7852                 if (cols1[jj] != cols2[jj]) ok = 0;
7853                 if (cols1[jj] % bs != jj % bs) ok = 0;
7854               }
7855             }
7856             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7857           }
7858           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7859           if (!ok) {
7860             PetscCall(PetscFree2(d_nnz, o_nnz));
7861             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7862             goto old_bs;
7863           }
7864         }
7865       }
7866       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7867       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7868       PetscCall(PetscFree2(d_nnz, o_nnz));
7869       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7870       // diag
7871       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7872         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7873 
7874         ai = aseq->i;
7875         n  = ai[brow + 1] - ai[brow];
7876         aj = aseq->j + ai[brow];
7877         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7878           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7879           val        = 0;
7880           if (index_size == 0) {
7881             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7882               aa = aseq->a + ai[brow + ii] + k;
7883               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7884                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7885               }
7886             }
7887           } else {                                            // use (index,index) value if provided
7888             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7889               PetscInt ii = index[iii];
7890               aa          = aseq->a + ai[brow + ii] + k;
7891               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7892                 PetscInt jj = index[jjj];
7893                 val += PetscAbs(PetscRealPart(aa[jj]));
7894               }
7895             }
7896           }
7897           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7898           AA[k / bs] = val;
7899         }
7900         grow = Istart / bs + brow / bs;
7901         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7902       }
7903       // off-diag
7904       if (ismpiaij) {
7905         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7906         const PetscScalar *vals;
7907         const PetscInt    *cols, *garray = aij->garray;
7908 
7909         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7910         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7911           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7912           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7913             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7914             AA[k / bs] = 0;
7915             AJ[cidx]   = garray[cols[k]] / bs;
7916           }
7917           nc = ncols / bs;
7918           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7919           if (index_size == 0) {
7920             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7921               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7922               for (PetscInt k = 0; k < ncols; k += bs) {
7923                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7924                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7925                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7926                 }
7927               }
7928               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7929             }
7930           } else {                                            // use (index,index) value if provided
7931             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7932               PetscInt ii = index[iii];
7933               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7934               for (PetscInt k = 0; k < ncols; k += bs) {
7935                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7936                   PetscInt jj = index[jjj];
7937                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7938                 }
7939               }
7940               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7941             }
7942           }
7943           grow = Istart / bs + brow / bs;
7944           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7945         }
7946       }
7947       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7948       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7949       PetscCall(PetscFree2(AA, AJ));
7950     } else {
7951       const PetscScalar *vals;
7952       const PetscInt    *idx;
7953       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7954     old_bs:
7955       /*
7956        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7957        */
7958       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7959       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7960       if (isseqaij) {
7961         PetscInt max_d_nnz;
7962 
7963         /*
7964          Determine exact preallocation count for (sequential) scalar matrix
7965          */
7966         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7967         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7968         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7969         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7970         PetscCall(PetscFree3(w0, w1, w2));
7971       } else if (ismpiaij) {
7972         Mat             Daij, Oaij;
7973         const PetscInt *garray;
7974         PetscInt        max_d_nnz;
7975 
7976         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7977         /*
7978          Determine exact preallocation count for diagonal block portion of scalar matrix
7979          */
7980         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7981         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7982         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7983         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7984         PetscCall(PetscFree3(w0, w1, w2));
7985         /*
7986          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7987          */
7988         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7989           o_nnz[jj] = 0;
7990           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7991             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7992             o_nnz[jj] += ncols;
7993             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7994           }
7995           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7996         }
7997       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7998       /* get scalar copy (norms) of matrix */
7999       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
8000       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
8001       PetscCall(PetscFree2(d_nnz, o_nnz));
8002       for (Ii = Istart; Ii < Iend; Ii++) {
8003         PetscInt dest_row = Ii / bs;
8004 
8005         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
8006         for (jj = 0; jj < ncols; jj++) {
8007           PetscInt    dest_col = idx[jj] / bs;
8008           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
8009 
8010           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
8011         }
8012         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
8013       }
8014       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
8015       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
8016     }
8017   } else {
8018     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8019     else {
8020       Gmat = Amat;
8021       PetscCall(PetscObjectReference((PetscObject)Gmat));
8022     }
8023     if (isseqaij) {
8024       a = Gmat;
8025       b = NULL;
8026     } else {
8027       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8028       a             = d->A;
8029       b             = d->B;
8030     }
8031     if (filter >= 0 || scale) {
8032       /* take absolute value of each entry */
8033       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8034         MatInfo      info;
8035         PetscScalar *avals;
8036 
8037         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8038         PetscCall(MatSeqAIJGetArray(c, &avals));
8039         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8040         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8041       }
8042     }
8043   }
8044   if (symmetrize) {
8045     PetscBool isset, issym;
8046 
8047     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8048     if (!isset || !issym) {
8049       Mat matTrans;
8050 
8051       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8052       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8053       PetscCall(MatDestroy(&matTrans));
8054     }
8055     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8056   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8057   if (scale) {
8058     /* scale c for all diagonal values = 1 or -1 */
8059     Vec diag;
8060 
8061     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8062     PetscCall(MatGetDiagonal(Gmat, diag));
8063     PetscCall(VecReciprocal(diag));
8064     PetscCall(VecSqrtAbs(diag));
8065     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8066     PetscCall(VecDestroy(&diag));
8067   }
8068   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8069   if (filter >= 0) {
8070     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8071     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8072   }
8073   *a_Gmat = Gmat;
8074   PetscFunctionReturn(PETSC_SUCCESS);
8075 }
8076 
8077 /*
8078     Special version for direct calls from Fortran
8079 */
8080 
8081 /* Change these macros so can be used in void function */
8082 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8083 #undef PetscCall
8084 #define PetscCall(...) \
8085   do { \
8086     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8087     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8088       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8089       return; \
8090     } \
8091   } while (0)
8092 
8093 #undef SETERRQ
8094 #define SETERRQ(comm, ierr, ...) \
8095   do { \
8096     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8097     return; \
8098   } while (0)
8099 
8100 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8101   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8103   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8104 #else
8105 #endif
8106 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8107 {
8108   Mat         mat = *mmat;
8109   PetscInt    m = *mm, n = *mn;
8110   InsertMode  addv = *maddv;
8111   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8112   PetscScalar value;
8113 
8114   MatCheckPreallocated(mat, 1);
8115   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8116   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8117   {
8118     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8119     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8120     PetscBool roworiented = aij->roworiented;
8121 
8122     /* Some Variables required in the macro */
8123     Mat         A     = aij->A;
8124     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8125     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8126     MatScalar  *aa;
8127     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8128     Mat         B                 = aij->B;
8129     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8130     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8131     MatScalar  *ba;
8132     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8133      * cannot use "#if defined" inside a macro. */
8134     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8135 
8136     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8137     PetscInt   nonew = a->nonew;
8138     MatScalar *ap1, *ap2;
8139 
8140     PetscFunctionBegin;
8141     PetscCall(MatSeqAIJGetArray(A, &aa));
8142     PetscCall(MatSeqAIJGetArray(B, &ba));
8143     for (i = 0; i < m; i++) {
8144       if (im[i] < 0) continue;
8145       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8146       if (im[i] >= rstart && im[i] < rend) {
8147         row      = im[i] - rstart;
8148         lastcol1 = -1;
8149         rp1      = aj + ai[row];
8150         ap1      = aa + ai[row];
8151         rmax1    = aimax[row];
8152         nrow1    = ailen[row];
8153         low1     = 0;
8154         high1    = nrow1;
8155         lastcol2 = -1;
8156         rp2      = bj + bi[row];
8157         ap2      = ba + bi[row];
8158         rmax2    = bimax[row];
8159         nrow2    = bilen[row];
8160         low2     = 0;
8161         high2    = nrow2;
8162 
8163         for (j = 0; j < n; j++) {
8164           if (roworiented) value = v[i * n + j];
8165           else value = v[i + j * m];
8166           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8167           if (in[j] >= cstart && in[j] < cend) {
8168             col = in[j] - cstart;
8169             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8170           } else if (in[j] < 0) continue;
8171           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8172             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8173           } else {
8174             if (mat->was_assembled) {
8175               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8176 #if defined(PETSC_USE_CTABLE)
8177               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8178               col--;
8179 #else
8180               col = aij->colmap[in[j]] - 1;
8181 #endif
8182               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8183                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8184                 col = in[j];
8185                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8186                 B        = aij->B;
8187                 b        = (Mat_SeqAIJ *)B->data;
8188                 bimax    = b->imax;
8189                 bi       = b->i;
8190                 bilen    = b->ilen;
8191                 bj       = b->j;
8192                 rp2      = bj + bi[row];
8193                 ap2      = ba + bi[row];
8194                 rmax2    = bimax[row];
8195                 nrow2    = bilen[row];
8196                 low2     = 0;
8197                 high2    = nrow2;
8198                 bm       = aij->B->rmap->n;
8199                 ba       = b->a;
8200                 inserted = PETSC_FALSE;
8201               }
8202             } else col = in[j];
8203             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8204           }
8205         }
8206       } else if (!aij->donotstash) {
8207         if (roworiented) {
8208           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8209         } else {
8210           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8211         }
8212       }
8213     }
8214     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8215     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8216   }
8217   PetscFunctionReturnVoid();
8218 }
8219 
8220 /* Undefining these here since they were redefined from their original definition above! No
8221  * other PETSc functions should be defined past this point, as it is impossible to recover the
8222  * original definitions */
8223 #undef PetscCall
8224 #undef SETERRQ
8225