xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d1c799ffc2c2dd0945dfd53da7d3f7c32cb9db4c)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */
10 #define TYPE AIJ
11 #define TYPE_AIJ
12 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
13 #undef TYPE
14 #undef TYPE_AIJ
15 
16 static PetscErrorCode MatReset_MPIAIJ(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
19 
20   PetscFunctionBegin;
21   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
22   PetscCall(MatStashDestroy_Private(&mat->stash));
23   PetscCall(VecDestroy(&aij->diag));
24   PetscCall(MatDestroy(&aij->A));
25   PetscCall(MatDestroy(&aij->B));
26 #if defined(PETSC_USE_CTABLE)
27   PetscCall(PetscHMapIDestroy(&aij->colmap));
28 #else
29   PetscCall(PetscFree(aij->colmap));
30 #endif
31   PetscCall(PetscFree(aij->garray));
32   PetscCall(VecDestroy(&aij->lvec));
33   PetscCall(VecScatterDestroy(&aij->Mvctx));
34   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
35   PetscCall(PetscFree(aij->ld));
36   PetscFunctionReturn(PETSC_SUCCESS);
37 }
38 
39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
42   /* Save the nonzero states of the component matrices because those are what are used to determine
43     the nonzero state of mat */
44   PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate;
45 
46   PetscFunctionBegin;
47   PetscCall(MatReset_MPIAIJ(mat));
48   PetscCall(MatSetUp_MPI_Hash(mat));
49   aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate;
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
54 {
55   PetscFunctionBegin;
56   PetscCall(MatReset_MPIAIJ(mat));
57 
58   PetscCall(PetscFree(mat->data));
59 
60   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
61   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
62 
63   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
74 #if defined(PETSC_HAVE_CUDA)
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
76 #endif
77 #if defined(PETSC_HAVE_HIP)
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
79 #endif
80 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
82 #endif
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
84 #if defined(PETSC_HAVE_ELEMENTAL)
85   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
86 #endif
87 #if defined(PETSC_HAVE_SCALAPACK)
88   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
89 #endif
90 #if defined(PETSC_HAVE_HYPRE)
91   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
92   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
93 #endif
94   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
95   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
96   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
97   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
98   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
99   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
100 #if defined(PETSC_HAVE_MKL_SPARSE)
101   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
102 #endif
103   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
104   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
105   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
106   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
107   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
112 {
113   Mat B;
114 
115   PetscFunctionBegin;
116   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
117   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
118   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
119   PetscCall(MatDestroy(&B));
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
124 {
125   Mat B;
126 
127   PetscFunctionBegin;
128   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
129   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
130   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*MC
135    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
136 
137    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
138    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
139   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
140   for communicators controlling multiple processes.  It is recommended that you call both of
141   the above preallocation routines for simplicity.
142 
143    Options Database Key:
144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
145 
146   Developer Note:
147   Level: beginner
148 
149     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
150    enough exist.
151 
152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
153 M*/
154 
155 /*MC
156    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
157 
158    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
159    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
160    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
161   for communicators controlling multiple processes.  It is recommended that you call both of
162   the above preallocation routines for simplicity.
163 
164    Options Database Key:
165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
166 
167   Level: beginner
168 
169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
170 M*/
171 
172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
173 {
174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
175 
176   PetscFunctionBegin;
177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
178   A->boundtocpu = flg;
179 #endif
180   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
181   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
182 
183   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
184    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
185    * to differ from the parent matrix. */
186   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
187   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
192 {
193   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
194 
195   PetscFunctionBegin;
196   if (mat->A) {
197     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
198     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
199   }
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
204 {
205   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
206   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
207   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
208   const PetscInt  *ia, *ib;
209   const MatScalar *aa, *bb, *aav, *bav;
210   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
211   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
212 
213   PetscFunctionBegin;
214   *keptrows = NULL;
215 
216   ia = a->i;
217   ib = b->i;
218   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
219   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
220   for (i = 0; i < m; i++) {
221     na = ia[i + 1] - ia[i];
222     nb = ib[i + 1] - ib[i];
223     if (!na && !nb) {
224       cnt++;
225       goto ok1;
226     }
227     aa = aav + ia[i];
228     for (j = 0; j < na; j++) {
229       if (aa[j] != 0.0) goto ok1;
230     }
231     bb = PetscSafePointerPlusOffset(bav, ib[i]);
232     for (j = 0; j < nb; j++) {
233       if (bb[j] != 0.0) goto ok1;
234     }
235     cnt++;
236   ok1:;
237   }
238   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
239   if (!n0rows) {
240     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
241     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
242     PetscFunctionReturn(PETSC_SUCCESS);
243   }
244   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
245   cnt = 0;
246   for (i = 0; i < m; i++) {
247     na = ia[i + 1] - ia[i];
248     nb = ib[i + 1] - ib[i];
249     if (!na && !nb) continue;
250     aa = aav + ia[i];
251     for (j = 0; j < na; j++) {
252       if (aa[j] != 0.0) {
253         rows[cnt++] = rstart + i;
254         goto ok2;
255       }
256     }
257     bb = PetscSafePointerPlusOffset(bav, ib[i]);
258     for (j = 0; j < nb; j++) {
259       if (bb[j] != 0.0) {
260         rows[cnt++] = rstart + i;
261         goto ok2;
262       }
263     }
264   ok2:;
265   }
266   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
267   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
268   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
275   PetscBool   cong;
276 
277   PetscFunctionBegin;
278   PetscCall(MatHasCongruentLayouts(Y, &cong));
279   if (Y->assembled && cong) {
280     PetscCall(MatDiagonalSet(aij->A, D, is));
281   } else {
282     PetscCall(MatDiagonalSet_Default(Y, D, is));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
288 {
289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
290   PetscInt    i, rstart, nrows, *rows;
291 
292   PetscFunctionBegin;
293   *zrows = NULL;
294   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
295   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
296   for (i = 0; i < nrows; i++) rows[i] += rstart;
297   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
302 {
303   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
304   PetscInt           i, m, n, *garray = aij->garray;
305   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
306   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
307   PetscReal         *work;
308   const PetscScalar *dummy;
309 
310   PetscFunctionBegin;
311   PetscCall(MatGetSize(A, &m, &n));
312   PetscCall(PetscCalloc1(n, &work));
313   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
314   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
315   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
316   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
317   if (type == NORM_2) {
318     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
319     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
320   } else if (type == NORM_1) {
321     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
322     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
323   } else if (type == NORM_INFINITY) {
324     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
325     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
326   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
327     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
328     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
329   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
330     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
331     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
332   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
333   if (type == NORM_INFINITY) {
334     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
335   } else {
336     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
337   }
338   PetscCall(PetscFree(work));
339   if (type == NORM_2) {
340     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
341   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
342     for (i = 0; i < n; i++) reductions[i] /= m;
343   }
344   PetscFunctionReturn(PETSC_SUCCESS);
345 }
346 
347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
348 {
349   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
350   IS              sis, gis;
351   const PetscInt *isis, *igis;
352   PetscInt        n, *iis, nsis, ngis, rstart, i;
353 
354   PetscFunctionBegin;
355   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
356   PetscCall(MatFindNonzeroRows(a->B, &gis));
357   PetscCall(ISGetSize(gis, &ngis));
358   PetscCall(ISGetSize(sis, &nsis));
359   PetscCall(ISGetIndices(sis, &isis));
360   PetscCall(ISGetIndices(gis, &igis));
361 
362   PetscCall(PetscMalloc1(ngis + nsis, &iis));
363   PetscCall(PetscArraycpy(iis, igis, ngis));
364   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
365   n = ngis + nsis;
366   PetscCall(PetscSortRemoveDupsInt(&n, iis));
367   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
368   for (i = 0; i < n; i++) iis[i] += rstart;
369   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
370 
371   PetscCall(ISRestoreIndices(sis, &isis));
372   PetscCall(ISRestoreIndices(gis, &igis));
373   PetscCall(ISDestroy(&sis));
374   PetscCall(ISDestroy(&gis));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 /*
379   Local utility routine that creates a mapping from the global column
380 number to the local number in the off-diagonal part of the local
381 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
382 a slightly higher hash table cost; without it it is not scalable (each processor
383 has an order N integer array but is fast to access.
384 */
385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
386 {
387   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
388   PetscInt    n   = aij->B->cmap->n, i;
389 
390   PetscFunctionBegin;
391   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
392 #if defined(PETSC_USE_CTABLE)
393   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
394   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
395 #else
396   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
397   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
398 #endif
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
403   do { \
404     if (col <= lastcol1) low1 = 0; \
405     else high1 = nrow1; \
406     lastcol1 = col; \
407     while (high1 - low1 > 5) { \
408       t = (low1 + high1) / 2; \
409       if (rp1[t] > col) high1 = t; \
410       else low1 = t; \
411     } \
412     for (_i = low1; _i < high1; _i++) { \
413       if (rp1[_i] > col) break; \
414       if (rp1[_i] == col) { \
415         if (addv == ADD_VALUES) { \
416           ap1[_i] += value; \
417           /* Not sure LogFlops will slow dow the code or not */ \
418           (void)PetscLogFlops(1.0); \
419         } else ap1[_i] = value; \
420         goto a_noinsert; \
421       } \
422     } \
423     if (value == 0.0 && ignorezeroentries && row != col) { \
424       low1  = 0; \
425       high1 = nrow1; \
426       goto a_noinsert; \
427     } \
428     if (nonew == 1) { \
429       low1  = 0; \
430       high1 = nrow1; \
431       goto a_noinsert; \
432     } \
433     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
434     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
435     N = nrow1++ - 1; \
436     a->nz++; \
437     high1++; \
438     /* shift up all the later entries in this row */ \
439     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
440     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
441     rp1[_i] = col; \
442     ap1[_i] = value; \
443   a_noinsert:; \
444     ailen[row] = nrow1; \
445   } while (0)
446 
447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
448   do { \
449     if (col <= lastcol2) low2 = 0; \
450     else high2 = nrow2; \
451     lastcol2 = col; \
452     while (high2 - low2 > 5) { \
453       t = (low2 + high2) / 2; \
454       if (rp2[t] > col) high2 = t; \
455       else low2 = t; \
456     } \
457     for (_i = low2; _i < high2; _i++) { \
458       if (rp2[_i] > col) break; \
459       if (rp2[_i] == col) { \
460         if (addv == ADD_VALUES) { \
461           ap2[_i] += value; \
462           (void)PetscLogFlops(1.0); \
463         } else ap2[_i] = value; \
464         goto b_noinsert; \
465       } \
466     } \
467     if (value == 0.0 && ignorezeroentries) { \
468       low2  = 0; \
469       high2 = nrow2; \
470       goto b_noinsert; \
471     } \
472     if (nonew == 1) { \
473       low2  = 0; \
474       high2 = nrow2; \
475       goto b_noinsert; \
476     } \
477     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
478     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
479     N = nrow2++ - 1; \
480     b->nz++; \
481     high2++; \
482     /* shift up all the later entries in this row */ \
483     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
484     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
485     rp2[_i] = col; \
486     ap2[_i] = value; \
487   b_noinsert:; \
488     bilen[row] = nrow2; \
489   } while (0)
490 
491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
492 {
493   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
494   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
495   PetscInt     l, *garray                         = mat->garray, diag;
496   PetscScalar *aa, *ba;
497 
498   PetscFunctionBegin;
499   /* code only works for square matrices A */
500 
501   /* find size of row to the left of the diagonal part */
502   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
503   row = row - diag;
504   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
505     if (garray[b->j[b->i[row] + l]] > diag) break;
506   }
507   if (l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512 
513   /* diagonal part */
514   if (a->i[row + 1] - a->i[row]) {
515     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
516     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
517     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
518   }
519 
520   /* right of diagonal part */
521   if (b->i[row + 1] - b->i[row] - l) {
522     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
523     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
524     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
525   }
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
530 {
531   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
532   PetscScalar value = 0.0;
533   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
534   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
535   PetscBool   roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat         A     = aij->A;
539   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
540   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
541   PetscBool   ignorezeroentries = a->ignorezeroentries;
542   Mat         B                 = aij->B;
543   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
544   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
545   MatScalar  *aa, *ba;
546   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
547   PetscInt    nonew;
548   MatScalar  *ap1, *ap2;
549 
550   PetscFunctionBegin;
551   PetscCall(MatSeqAIJGetArray(A, &aa));
552   PetscCall(MatSeqAIJGetArray(B, &ba));
553   for (i = 0; i < m; i++) {
554     if (im[i] < 0) continue;
555     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
560       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
567       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j = 0; j < n; j++) {
574         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
575         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
580         } else if (in[j] < 0) {
581           continue;
582         } else {
583           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
584           if (mat->was_assembled) {
585             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
586 #if defined(PETSC_USE_CTABLE)
587             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
593               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
594               col = in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ *)B->data;
598               bimax = b->imax;
599               bi    = b->i;
600               bilen = b->ilen;
601               bj    = b->j;
602               ba    = b->a;
603               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
604               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
612               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
613                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
614               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
615             }
616           } else col = in[j];
617           nonew = b->nonew;
618           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
619         }
620       }
621     } else {
622       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
623       if (!aij->donotstash) {
624         mat->assembled = PETSC_FALSE;
625         if (roworiented) {
626           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
627         } else {
628           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
629         }
630       }
631     }
632   }
633   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
634   PetscCall(MatSeqAIJRestoreArray(B, &ba));
635   PetscFunctionReturn(PETSC_SUCCESS);
636 }
637 
638 /*
639     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
640     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
641     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
642 */
643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
644 {
645   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
646   Mat         A      = aij->A; /* diagonal part of the matrix */
647   Mat         B      = aij->B; /* off-diagonal part of the matrix */
648   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
649   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
650   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
651   PetscInt   *ailen = a->ilen, *aj = a->j;
652   PetscInt   *bilen = b->ilen, *bj = b->j;
653   PetscInt    am          = aij->A->rmap->n, j;
654   PetscInt    diag_so_far = 0, dnz;
655   PetscInt    offd_so_far = 0, onz;
656 
657   PetscFunctionBegin;
658   /* Iterate over all rows of the matrix */
659   for (j = 0; j < am; j++) {
660     dnz = onz = 0;
661     /*  Iterate over all non-zero columns of the current row */
662     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
663       /* If column is in the diagonal */
664       if (mat_j[col] >= cstart && mat_j[col] < cend) {
665         aj[diag_so_far++] = mat_j[col] - cstart;
666         dnz++;
667       } else { /* off-diagonal entries */
668         bj[offd_so_far++] = mat_j[col];
669         onz++;
670       }
671     }
672     ailen[j] = dnz;
673     bilen[j] = onz;
674   }
675   PetscFunctionReturn(PETSC_SUCCESS);
676 }
677 
678 /*
679     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
680     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
681     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
682     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
683     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
684 */
685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
686 {
687   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
688   Mat          A    = aij->A; /* diagonal part of the matrix */
689   Mat          B    = aij->B; /* off-diagonal part of the matrix */
690   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
691   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
692   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
693   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
694   PetscInt    *ailen = a->ilen, *aj = a->j;
695   PetscInt    *bilen = b->ilen, *bj = b->j;
696   PetscInt     am          = aij->A->rmap->n, j;
697   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
698   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
699   PetscScalar *aa = a->a, *ba = b->a;
700 
701   PetscFunctionBegin;
702   /* Iterate over all rows of the matrix */
703   for (j = 0; j < am; j++) {
704     dnz_row = onz_row = 0;
705     rowstart_offd     = full_offd_i[j];
706     rowstart_diag     = full_diag_i[j];
707     /*  Iterate over all non-zero columns of the current row */
708     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
709       /* If column is in the diagonal */
710       if (mat_j[col] >= cstart && mat_j[col] < cend) {
711         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
712         aa[rowstart_diag + dnz_row] = mat_a[col];
713         dnz_row++;
714       } else { /* off-diagonal entries */
715         bj[rowstart_offd + onz_row] = mat_j[col];
716         ba[rowstart_offd + onz_row] = mat_a[col];
717         onz_row++;
718       }
719     }
720     ailen[j] = dnz_row;
721     bilen[j] = onz_row;
722   }
723   PetscFunctionReturn(PETSC_SUCCESS);
724 }
725 
726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
727 {
728   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
729   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
730   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
731 
732   PetscFunctionBegin;
733   for (i = 0; i < m; i++) {
734     if (idxm[i] < 0) continue; /* negative row */
735     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
736     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
737     row = idxm[i] - rstart;
738     for (j = 0; j < n; j++) {
739       if (idxn[j] < 0) continue; /* negative column */
740       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
741       if (idxn[j] >= cstart && idxn[j] < cend) {
742         col = idxn[j] - cstart;
743         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
744       } else {
745         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
746 #if defined(PETSC_USE_CTABLE)
747         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
748         col--;
749 #else
750         col = aij->colmap[idxn[j]] - 1;
751 #endif
752         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
753         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
754       }
755     }
756   }
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
761 {
762   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
763   PetscInt    nstash, reallocs;
764 
765   PetscFunctionBegin;
766   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
767 
768   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
769   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
770   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
771   PetscFunctionReturn(PETSC_SUCCESS);
772 }
773 
774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
775 {
776   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
777   PetscMPIInt  n;
778   PetscInt     i, j, rstart, ncols, flg;
779   PetscInt    *row, *col;
780   PetscBool    other_disassembled;
781   PetscScalar *val;
782 
783   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
784 
785   PetscFunctionBegin;
786   if (!aij->donotstash && !mat->nooffprocentries) {
787     while (1) {
788       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
789       if (!flg) break;
790 
791       for (i = 0; i < n;) {
792         /* Now identify the consecutive vals belonging to the same row */
793         for (j = i, rstart = row[j]; j < n; j++) {
794           if (row[j] != rstart) break;
795         }
796         if (j < n) ncols = j - i;
797         else ncols = n - i;
798         /* Now assemble all these values with a single function call */
799         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
800         i = j;
801       }
802     }
803     PetscCall(MatStashScatterEnd_Private(&mat->stash));
804   }
805 #if defined(PETSC_HAVE_DEVICE)
806   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
807   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
808   if (mat->boundtocpu) {
809     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
810     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
811   }
812 #endif
813   PetscCall(MatAssemblyBegin(aij->A, mode));
814   PetscCall(MatAssemblyEnd(aij->A, mode));
815 
816   /* determine if any processor has disassembled, if so we must
817      also disassemble ourself, in order that we may reassemble. */
818   /*
819      if nonzero structure of submatrix B cannot change then we know that
820      no processor disassembled thus we can skip this stuff
821   */
822   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
823     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
824     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
825       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
826     }
827   }
828   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
829   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
830 #if defined(PETSC_HAVE_DEVICE)
831   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
832 #endif
833   PetscCall(MatAssemblyBegin(aij->B, mode));
834   PetscCall(MatAssemblyEnd(aij->B, mode));
835 
836   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
837 
838   aij->rowvalues = NULL;
839 
840   PetscCall(VecDestroy(&aij->diag));
841 
842   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
843   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
844     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
845     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
846   }
847 #if defined(PETSC_HAVE_DEVICE)
848   mat->offloadmask = PETSC_OFFLOAD_BOTH;
849 #endif
850   PetscFunctionReturn(PETSC_SUCCESS);
851 }
852 
853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
854 {
855   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
856 
857   PetscFunctionBegin;
858   PetscCall(MatZeroEntries(l->A));
859   PetscCall(MatZeroEntries(l->B));
860   PetscFunctionReturn(PETSC_SUCCESS);
861 }
862 
863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
864 {
865   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
866   PetscInt   *lrows;
867   PetscInt    r, len;
868   PetscBool   cong;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
873   PetscCall(MatHasCongruentLayouts(A, &cong));
874   /* fix right-hand side if needed */
875   if (x && b) {
876     const PetscScalar *xx;
877     PetscScalar       *bb;
878 
879     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
880     PetscCall(VecGetArrayRead(x, &xx));
881     PetscCall(VecGetArray(b, &bb));
882     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
883     PetscCall(VecRestoreArrayRead(x, &xx));
884     PetscCall(VecRestoreArray(b, &bb));
885   }
886 
887   if (diag != 0.0 && cong) {
888     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
889     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
890   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
891     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
892     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
893     PetscInt    nnwA, nnwB;
894     PetscBool   nnzA, nnzB;
895 
896     nnwA = aijA->nonew;
897     nnwB = aijB->nonew;
898     nnzA = aijA->keepnonzeropattern;
899     nnzB = aijB->keepnonzeropattern;
900     if (!nnzA) {
901       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
902       aijA->nonew = 0;
903     }
904     if (!nnzB) {
905       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
906       aijB->nonew = 0;
907     }
908     /* Must zero here before the next loop */
909     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
910     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
911     for (r = 0; r < len; ++r) {
912       const PetscInt row = lrows[r] + A->rmap->rstart;
913       if (row >= A->cmap->N) continue;
914       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
915     }
916     aijA->nonew = nnwA;
917     aijB->nonew = nnwB;
918   } else {
919     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
920     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
921   }
922   PetscCall(PetscFree(lrows));
923   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
924   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
928     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
929     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
930   }
931   PetscFunctionReturn(PETSC_SUCCESS);
932 }
933 
934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
935 {
936   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
937   PetscInt           n = A->rmap->n;
938   PetscInt           i, j, r, m, len = 0;
939   PetscInt          *lrows, *owners = A->rmap->range;
940   PetscMPIInt        p = 0;
941   PetscSFNode       *rrows;
942   PetscSF            sf;
943   const PetscScalar *xx;
944   PetscScalar       *bb, *mask, *aij_a;
945   Vec                xmask, lmask;
946   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
947   const PetscInt    *aj, *ii, *ridx;
948   PetscScalar       *aa;
949 
950   PetscFunctionBegin;
951   /* Create SF where leaves are input rows and roots are owned rows */
952   PetscCall(PetscMalloc1(n, &lrows));
953   for (r = 0; r < n; ++r) lrows[r] = -1;
954   PetscCall(PetscMalloc1(N, &rrows));
955   for (r = 0; r < N; ++r) {
956     const PetscInt idx = rows[r];
957     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
958     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
959       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
960     }
961     rrows[r].rank  = p;
962     rrows[r].index = rows[r] - owners[p];
963   }
964   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
965   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
966   /* Collect flags for rows to be zeroed */
967   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
968   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
969   PetscCall(PetscSFDestroy(&sf));
970   /* Compress and put in row numbers */
971   for (r = 0; r < n; ++r)
972     if (lrows[r] >= 0) lrows[len++] = r;
973   /* zero diagonal part of matrix */
974   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
975   /* handle off-diagonal part of matrix */
976   PetscCall(MatCreateVecs(A, &xmask, NULL));
977   PetscCall(VecDuplicate(l->lvec, &lmask));
978   PetscCall(VecGetArray(xmask, &bb));
979   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
980   PetscCall(VecRestoreArray(xmask, &bb));
981   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
982   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
983   PetscCall(VecDestroy(&xmask));
984   if (x && b) { /* this code is buggy when the row and column layout don't match */
985     PetscBool cong;
986 
987     PetscCall(MatHasCongruentLayouts(A, &cong));
988     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
989     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
990     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
991     PetscCall(VecGetArrayRead(l->lvec, &xx));
992     PetscCall(VecGetArray(b, &bb));
993   }
994   PetscCall(VecGetArray(lmask, &mask));
995   /* remove zeroed rows of off-diagonal matrix */
996   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
997   ii = aij->i;
998   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
999   /* loop over all elements of off process part of matrix zeroing removed columns*/
1000   if (aij->compressedrow.use) {
1001     m    = aij->compressedrow.nrows;
1002     ii   = aij->compressedrow.i;
1003     ridx = aij->compressedrow.rindex;
1004     for (i = 0; i < m; i++) {
1005       n  = ii[i + 1] - ii[i];
1006       aj = aij->j + ii[i];
1007       aa = aij_a + ii[i];
1008 
1009       for (j = 0; j < n; j++) {
1010         if (PetscAbsScalar(mask[*aj])) {
1011           if (b) bb[*ridx] -= *aa * xx[*aj];
1012           *aa = 0.0;
1013         }
1014         aa++;
1015         aj++;
1016       }
1017       ridx++;
1018     }
1019   } else { /* do not use compressed row format */
1020     m = l->B->rmap->n;
1021     for (i = 0; i < m; i++) {
1022       n  = ii[i + 1] - ii[i];
1023       aj = aij->j + ii[i];
1024       aa = aij_a + ii[i];
1025       for (j = 0; j < n; j++) {
1026         if (PetscAbsScalar(mask[*aj])) {
1027           if (b) bb[i] -= *aa * xx[*aj];
1028           *aa = 0.0;
1029         }
1030         aa++;
1031         aj++;
1032       }
1033     }
1034   }
1035   if (x && b) {
1036     PetscCall(VecRestoreArray(b, &bb));
1037     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1038   }
1039   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1040   PetscCall(VecRestoreArray(lmask, &mask));
1041   PetscCall(VecDestroy(&lmask));
1042   PetscCall(PetscFree(lrows));
1043 
1044   /* only change matrix nonzero state if pattern was allowed to be changed */
1045   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1046     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1047     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1048   }
1049   PetscFunctionReturn(PETSC_SUCCESS);
1050 }
1051 
1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1053 {
1054   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1055   PetscInt    nt;
1056   VecScatter  Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   PetscCall(VecGetLocalSize(xx, &nt));
1060   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1061   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1062   PetscUseTypeMethod(a->A, mult, xx, yy);
1063   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1065   PetscFunctionReturn(PETSC_SUCCESS);
1066 }
1067 
1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1069 {
1070   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1071 
1072   PetscFunctionBegin;
1073   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1078 {
1079   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1080   VecScatter  Mvctx = a->Mvctx;
1081 
1082   PetscFunctionBegin;
1083   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1084   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1085   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1086   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1087   PetscFunctionReturn(PETSC_SUCCESS);
1088 }
1089 
1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1091 {
1092   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1093 
1094   PetscFunctionBegin;
1095   /* do nondiagonal part */
1096   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1097   /* do local part */
1098   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1099   /* add partial results together */
1100   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1101   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1102   PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104 
1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1106 {
1107   MPI_Comm    comm;
1108   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1109   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1110   IS          Me, Notme;
1111   PetscInt    M, N, first, last, *notme, i;
1112   PetscBool   lf;
1113   PetscMPIInt size;
1114 
1115   PetscFunctionBegin;
1116   /* Easy test: symmetric diagonal block */
1117   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1118   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1119   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1120   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1121   PetscCallMPI(MPI_Comm_size(comm, &size));
1122   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1123 
1124   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1125   PetscCall(MatGetSize(Amat, &M, &N));
1126   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1127   PetscCall(PetscMalloc1(N - last + first, &notme));
1128   for (i = 0; i < first; i++) notme[i] = i;
1129   for (i = last; i < M; i++) notme[i - last + first] = i;
1130   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1131   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1132   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1133   Aoff = Aoffs[0];
1134   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1135   Boff = Boffs[0];
1136   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1137   PetscCall(MatDestroyMatrices(1, &Aoffs));
1138   PetscCall(MatDestroyMatrices(1, &Boffs));
1139   PetscCall(ISDestroy(&Me));
1140   PetscCall(ISDestroy(&Notme));
1141   PetscCall(PetscFree(notme));
1142   PetscFunctionReturn(PETSC_SUCCESS);
1143 }
1144 
1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1146 {
1147   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1148 
1149   PetscFunctionBegin;
1150   /* do nondiagonal part */
1151   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1152   /* do local part */
1153   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1154   /* add partial results together */
1155   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1156   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1157   PetscFunctionReturn(PETSC_SUCCESS);
1158 }
1159 
1160 /*
1161   This only works correctly for square matrices where the subblock A->A is the
1162    diagonal block
1163 */
1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1165 {
1166   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1167 
1168   PetscFunctionBegin;
1169   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1170   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1171   PetscCall(MatGetDiagonal(a->A, v));
1172   PetscFunctionReturn(PETSC_SUCCESS);
1173 }
1174 
1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1176 {
1177   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(MatScale(a->A, aa));
1181   PetscCall(MatScale(a->B, aa));
1182   PetscFunctionReturn(PETSC_SUCCESS);
1183 }
1184 
1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1186 {
1187   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1188   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1189   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1190   const PetscInt    *garray = aij->garray;
1191   const PetscScalar *aa, *ba;
1192   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1193   PetscInt64         nz, hnz;
1194   PetscInt          *rowlens;
1195   PetscInt          *colidxs;
1196   PetscScalar       *matvals;
1197   PetscMPIInt        rank;
1198 
1199   PetscFunctionBegin;
1200   PetscCall(PetscViewerSetUp(viewer));
1201 
1202   M  = mat->rmap->N;
1203   N  = mat->cmap->N;
1204   m  = mat->rmap->n;
1205   rs = mat->rmap->rstart;
1206   cs = mat->cmap->rstart;
1207   nz = A->nz + B->nz;
1208 
1209   /* write matrix header */
1210   header[0] = MAT_FILE_CLASSID;
1211   header[1] = M;
1212   header[2] = N;
1213   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1214   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1215   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1216   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1217 
1218   /* fill in and store row lengths  */
1219   PetscCall(PetscMalloc1(m, &rowlens));
1220   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1221   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1222   PetscCall(PetscFree(rowlens));
1223 
1224   /* fill in and store column indices */
1225   PetscCall(PetscMalloc1(nz, &colidxs));
1226   for (cnt = 0, i = 0; i < m; i++) {
1227     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1228       if (garray[B->j[jb]] > cs) break;
1229       colidxs[cnt++] = garray[B->j[jb]];
1230     }
1231     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1232     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1233   }
1234   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1235   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1236   PetscCall(PetscFree(colidxs));
1237 
1238   /* fill in and store nonzero values */
1239   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1240   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1241   PetscCall(PetscMalloc1(nz, &matvals));
1242   for (cnt = 0, i = 0; i < m; i++) {
1243     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1244       if (garray[B->j[jb]] > cs) break;
1245       matvals[cnt++] = ba[jb];
1246     }
1247     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1248     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1249   }
1250   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1251   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1252   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1253   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1254   PetscCall(PetscFree(matvals));
1255 
1256   /* write block size option to the viewer's .info file */
1257   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1258   PetscFunctionReturn(PETSC_SUCCESS);
1259 }
1260 
1261 #include <petscdraw.h>
1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1263 {
1264   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1265   PetscMPIInt       rank = aij->rank, size = aij->size;
1266   PetscBool         isdraw, iascii, isbinary;
1267   PetscViewer       sviewer;
1268   PetscViewerFormat format;
1269 
1270   PetscFunctionBegin;
1271   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1272   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1273   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1274   if (iascii) {
1275     PetscCall(PetscViewerGetFormat(viewer, &format));
1276     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1277       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1278       PetscCall(PetscMalloc1(size, &nz));
1279       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1280       for (i = 0; i < size; i++) {
1281         nmax = PetscMax(nmax, nz[i]);
1282         nmin = PetscMin(nmin, nz[i]);
1283         navg += nz[i];
1284       }
1285       PetscCall(PetscFree(nz));
1286       navg = navg / size;
1287       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1288       PetscFunctionReturn(PETSC_SUCCESS);
1289     }
1290     PetscCall(PetscViewerGetFormat(viewer, &format));
1291     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1292       MatInfo   info;
1293       PetscInt *inodes = NULL;
1294 
1295       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1296       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1297       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1298       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1299       if (!inodes) {
1300         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1301                                                      info.memory));
1302       } else {
1303         PetscCall(
1304           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory));
1305       }
1306       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1307       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1308       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1309       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1310       PetscCall(PetscViewerFlush(viewer));
1311       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1312       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1313       PetscCall(VecScatterView(aij->Mvctx, viewer));
1314       PetscFunctionReturn(PETSC_SUCCESS);
1315     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1316       PetscInt inodecount, inodelimit, *inodes;
1317       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1318       if (inodes) {
1319         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1320       } else {
1321         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1322       }
1323       PetscFunctionReturn(PETSC_SUCCESS);
1324     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1325       PetscFunctionReturn(PETSC_SUCCESS);
1326     }
1327   } else if (isbinary) {
1328     if (size == 1) {
1329       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1330       PetscCall(MatView(aij->A, viewer));
1331     } else {
1332       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1333     }
1334     PetscFunctionReturn(PETSC_SUCCESS);
1335   } else if (iascii && size == 1) {
1336     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1337     PetscCall(MatView(aij->A, viewer));
1338     PetscFunctionReturn(PETSC_SUCCESS);
1339   } else if (isdraw) {
1340     PetscDraw draw;
1341     PetscBool isnull;
1342     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1343     PetscCall(PetscDrawIsNull(draw, &isnull));
1344     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1345   }
1346 
1347   { /* assemble the entire matrix onto first processor */
1348     Mat A = NULL, Av;
1349     IS  isrow, iscol;
1350 
1351     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1352     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1353     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1354     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1355     /*  The commented code uses MatCreateSubMatrices instead */
1356     /*
1357     Mat *AA, A = NULL, Av;
1358     IS  isrow,iscol;
1359 
1360     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1361     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1362     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1363     if (rank == 0) {
1364        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1365        A    = AA[0];
1366        Av   = AA[0];
1367     }
1368     PetscCall(MatDestroySubMatrices(1,&AA));
1369 */
1370     PetscCall(ISDestroy(&iscol));
1371     PetscCall(ISDestroy(&isrow));
1372     /*
1373        Everyone has to call to draw the matrix since the graphics waits are
1374        synchronized across all processors that share the PetscDraw object
1375     */
1376     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1377     if (rank == 0) {
1378       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1379       PetscCall(MatView_SeqAIJ(Av, sviewer));
1380     }
1381     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1382     PetscCall(MatDestroy(&A));
1383   }
1384   PetscFunctionReturn(PETSC_SUCCESS);
1385 }
1386 
1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1388 {
1389   PetscBool iascii, isdraw, issocket, isbinary;
1390 
1391   PetscFunctionBegin;
1392   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1393   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1395   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1396   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1397   PetscFunctionReturn(PETSC_SUCCESS);
1398 }
1399 
1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1401 {
1402   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1403   Vec         bb1 = NULL;
1404   PetscBool   hasop;
1405 
1406   PetscFunctionBegin;
1407   if (flag == SOR_APPLY_UPPER) {
1408     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1409     PetscFunctionReturn(PETSC_SUCCESS);
1410   }
1411 
1412   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1413 
1414   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419 
1420     while (its--) {
1421       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1423 
1424       /* update rhs: bb1 = bb - B*x */
1425       PetscCall(VecScale(mat->lvec, -1.0));
1426       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1427 
1428       /* local sweep */
1429       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1430     }
1431   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1432     if (flag & SOR_ZERO_INITIAL_GUESS) {
1433       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1434       its--;
1435     }
1436     while (its--) {
1437       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1439 
1440       /* update rhs: bb1 = bb - B*x */
1441       PetscCall(VecScale(mat->lvec, -1.0));
1442       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1443 
1444       /* local sweep */
1445       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1446     }
1447   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1448     if (flag & SOR_ZERO_INITIAL_GUESS) {
1449       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1450       its--;
1451     }
1452     while (its--) {
1453       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1455 
1456       /* update rhs: bb1 = bb - B*x */
1457       PetscCall(VecScale(mat->lvec, -1.0));
1458       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1459 
1460       /* local sweep */
1461       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1462     }
1463   } else if (flag & SOR_EISENSTAT) {
1464     Vec xx1;
1465 
1466     PetscCall(VecDuplicate(bb, &xx1));
1467     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1468 
1469     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1470     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1471     if (!mat->diag) {
1472       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1473       PetscCall(MatGetDiagonal(matin, mat->diag));
1474     }
1475     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1476     if (hasop) {
1477       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1478     } else {
1479       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1480     }
1481     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1482 
1483     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1484 
1485     /* local sweep */
1486     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1487     PetscCall(VecAXPY(xx, 1.0, xx1));
1488     PetscCall(VecDestroy(&xx1));
1489   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1490 
1491   PetscCall(VecDestroy(&bb1));
1492 
1493   matin->factorerrortype = mat->A->factorerrortype;
1494   PetscFunctionReturn(PETSC_SUCCESS);
1495 }
1496 
1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1498 {
1499   Mat             aA, aB, Aperm;
1500   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1501   PetscScalar    *aa, *ba;
1502   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1503   PetscSF         rowsf, sf;
1504   IS              parcolp = NULL;
1505   PetscBool       done;
1506 
1507   PetscFunctionBegin;
1508   PetscCall(MatGetLocalSize(A, &m, &n));
1509   PetscCall(ISGetIndices(rowp, &rwant));
1510   PetscCall(ISGetIndices(colp, &cwant));
1511   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1512 
1513   /* Invert row permutation to find out where my rows should go */
1514   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1515   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1516   PetscCall(PetscSFSetFromOptions(rowsf));
1517   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1518   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1519   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1520 
1521   /* Invert column permutation to find out where my columns should go */
1522   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1523   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1524   PetscCall(PetscSFSetFromOptions(sf));
1525   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1526   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1527   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1528   PetscCall(PetscSFDestroy(&sf));
1529 
1530   PetscCall(ISRestoreIndices(rowp, &rwant));
1531   PetscCall(ISRestoreIndices(colp, &cwant));
1532   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1533 
1534   /* Find out where my gcols should go */
1535   PetscCall(MatGetSize(aB, NULL, &ng));
1536   PetscCall(PetscMalloc1(ng, &gcdest));
1537   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1538   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1539   PetscCall(PetscSFSetFromOptions(sf));
1540   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1541   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1542   PetscCall(PetscSFDestroy(&sf));
1543 
1544   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1545   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1546   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1547   for (i = 0; i < m; i++) {
1548     PetscInt    row = rdest[i];
1549     PetscMPIInt rowner;
1550     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1551     for (j = ai[i]; j < ai[i + 1]; j++) {
1552       PetscInt    col = cdest[aj[j]];
1553       PetscMPIInt cowner;
1554       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1555       if (rowner == cowner) dnnz[i]++;
1556       else onnz[i]++;
1557     }
1558     for (j = bi[i]; j < bi[i + 1]; j++) {
1559       PetscInt    col = gcdest[bj[j]];
1560       PetscMPIInt cowner;
1561       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1562       if (rowner == cowner) dnnz[i]++;
1563       else onnz[i]++;
1564     }
1565   }
1566   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1567   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1568   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1569   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1570   PetscCall(PetscSFDestroy(&rowsf));
1571 
1572   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1573   PetscCall(MatSeqAIJGetArray(aA, &aa));
1574   PetscCall(MatSeqAIJGetArray(aB, &ba));
1575   for (i = 0; i < m; i++) {
1576     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1577     PetscInt  j0, rowlen;
1578     rowlen = ai[i + 1] - ai[i];
1579     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1580       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1581       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1582     }
1583     rowlen = bi[i + 1] - bi[i];
1584     for (j0 = j = 0; j < rowlen; j0 = j) {
1585       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1586       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1587     }
1588   }
1589   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1590   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1591   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1592   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1593   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1594   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1595   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1596   PetscCall(PetscFree3(work, rdest, cdest));
1597   PetscCall(PetscFree(gcdest));
1598   if (parcolp) PetscCall(ISDestroy(&colp));
1599   *B = Aperm;
1600   PetscFunctionReturn(PETSC_SUCCESS);
1601 }
1602 
1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1604 {
1605   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1606 
1607   PetscFunctionBegin;
1608   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1609   if (ghosts) *ghosts = aij->garray;
1610   PetscFunctionReturn(PETSC_SUCCESS);
1611 }
1612 
1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1614 {
1615   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1616   Mat            A = mat->A, B = mat->B;
1617   PetscLogDouble isend[5], irecv[5];
1618 
1619   PetscFunctionBegin;
1620   info->block_size = 1.0;
1621   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1622 
1623   isend[0] = info->nz_used;
1624   isend[1] = info->nz_allocated;
1625   isend[2] = info->nz_unneeded;
1626   isend[3] = info->memory;
1627   isend[4] = info->mallocs;
1628 
1629   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1630 
1631   isend[0] += info->nz_used;
1632   isend[1] += info->nz_allocated;
1633   isend[2] += info->nz_unneeded;
1634   isend[3] += info->memory;
1635   isend[4] += info->mallocs;
1636   if (flag == MAT_LOCAL) {
1637     info->nz_used      = isend[0];
1638     info->nz_allocated = isend[1];
1639     info->nz_unneeded  = isend[2];
1640     info->memory       = isend[3];
1641     info->mallocs      = isend[4];
1642   } else if (flag == MAT_GLOBAL_MAX) {
1643     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1644 
1645     info->nz_used      = irecv[0];
1646     info->nz_allocated = irecv[1];
1647     info->nz_unneeded  = irecv[2];
1648     info->memory       = irecv[3];
1649     info->mallocs      = irecv[4];
1650   } else if (flag == MAT_GLOBAL_SUM) {
1651     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1652 
1653     info->nz_used      = irecv[0];
1654     info->nz_allocated = irecv[1];
1655     info->nz_unneeded  = irecv[2];
1656     info->memory       = irecv[3];
1657     info->mallocs      = irecv[4];
1658   }
1659   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1660   info->fill_ratio_needed = 0;
1661   info->factor_mallocs    = 0;
1662   PetscFunctionReturn(PETSC_SUCCESS);
1663 }
1664 
1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1666 {
1667   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1668 
1669   PetscFunctionBegin;
1670   switch (op) {
1671   case MAT_NEW_NONZERO_LOCATIONS:
1672   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1673   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1674   case MAT_KEEP_NONZERO_PATTERN:
1675   case MAT_NEW_NONZERO_LOCATION_ERR:
1676   case MAT_USE_INODES:
1677   case MAT_IGNORE_ZERO_ENTRIES:
1678   case MAT_FORM_EXPLICIT_TRANSPOSE:
1679     MatCheckPreallocated(A, 1);
1680     PetscCall(MatSetOption(a->A, op, flg));
1681     PetscCall(MatSetOption(a->B, op, flg));
1682     break;
1683   case MAT_ROW_ORIENTED:
1684     MatCheckPreallocated(A, 1);
1685     a->roworiented = flg;
1686 
1687     PetscCall(MatSetOption(a->A, op, flg));
1688     PetscCall(MatSetOption(a->B, op, flg));
1689     break;
1690   case MAT_IGNORE_OFF_PROC_ENTRIES:
1691     a->donotstash = flg;
1692     break;
1693   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1694   case MAT_SPD:
1695   case MAT_SYMMETRIC:
1696   case MAT_STRUCTURALLY_SYMMETRIC:
1697   case MAT_HERMITIAN:
1698   case MAT_SYMMETRY_ETERNAL:
1699   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1700   case MAT_SPD_ETERNAL:
1701     /* if the diagonal matrix is square it inherits some of the properties above */
1702     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1703     break;
1704   case MAT_SUBMAT_SINGLEIS:
1705     A->submat_singleis = flg;
1706     break;
1707   default:
1708     break;
1709   }
1710   PetscFunctionReturn(PETSC_SUCCESS);
1711 }
1712 
1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1714 {
1715   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1716   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1717   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1718   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1719   PetscInt    *cmap, *idx_p;
1720 
1721   PetscFunctionBegin;
1722   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1723   mat->getrowactive = PETSC_TRUE;
1724 
1725   if (!mat->rowvalues && (idx || v)) {
1726     /*
1727         allocate enough space to hold information from the longest row.
1728     */
1729     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1730     PetscInt    max = 1, tmp;
1731     for (i = 0; i < matin->rmap->n; i++) {
1732       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1733       if (max < tmp) max = tmp;
1734     }
1735     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1736   }
1737 
1738   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1739   lrow = row - rstart;
1740 
1741   pvA = &vworkA;
1742   pcA = &cworkA;
1743   pvB = &vworkB;
1744   pcB = &cworkB;
1745   if (!v) {
1746     pvA = NULL;
1747     pvB = NULL;
1748   }
1749   if (!idx) {
1750     pcA = NULL;
1751     if (!v) pcB = NULL;
1752   }
1753   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1754   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1755   nztot = nzA + nzB;
1756 
1757   cmap = mat->garray;
1758   if (v || idx) {
1759     if (nztot) {
1760       /* Sort by increasing column numbers, assuming A and B already sorted */
1761       PetscInt imark = -1;
1762       if (v) {
1763         *v = v_p = mat->rowvalues;
1764         for (i = 0; i < nzB; i++) {
1765           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1766           else break;
1767         }
1768         imark = i;
1769         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1770         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1771       }
1772       if (idx) {
1773         *idx = idx_p = mat->rowindices;
1774         if (imark > -1) {
1775           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1776         } else {
1777           for (i = 0; i < nzB; i++) {
1778             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1779             else break;
1780           }
1781           imark = i;
1782         }
1783         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1784         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1785       }
1786     } else {
1787       if (idx) *idx = NULL;
1788       if (v) *v = NULL;
1789     }
1790   }
1791   *nz = nztot;
1792   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1793   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1798 {
1799   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1800 
1801   PetscFunctionBegin;
1802   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1803   aij->getrowactive = PETSC_FALSE;
1804   PetscFunctionReturn(PETSC_SUCCESS);
1805 }
1806 
1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1808 {
1809   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1810   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1811   PetscInt         i, j, cstart = mat->cmap->rstart;
1812   PetscReal        sum = 0.0;
1813   const MatScalar *v, *amata, *bmata;
1814 
1815   PetscFunctionBegin;
1816   if (aij->size == 1) {
1817     PetscCall(MatNorm(aij->A, type, norm));
1818   } else {
1819     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1820     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1821     if (type == NORM_FROBENIUS) {
1822       v = amata;
1823       for (i = 0; i < amat->nz; i++) {
1824         sum += PetscRealPart(PetscConj(*v) * (*v));
1825         v++;
1826       }
1827       v = bmata;
1828       for (i = 0; i < bmat->nz; i++) {
1829         sum += PetscRealPart(PetscConj(*v) * (*v));
1830         v++;
1831       }
1832       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1833       *norm = PetscSqrtReal(*norm);
1834       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1835     } else if (type == NORM_1) { /* max column norm */
1836       PetscReal *tmp;
1837       PetscInt  *jj, *garray = aij->garray;
1838       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1839       *norm = 0.0;
1840       v     = amata;
1841       jj    = amat->j;
1842       for (j = 0; j < amat->nz; j++) {
1843         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1844         v++;
1845       }
1846       v  = bmata;
1847       jj = bmat->j;
1848       for (j = 0; j < bmat->nz; j++) {
1849         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1850         v++;
1851       }
1852       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1853       for (j = 0; j < mat->cmap->N; j++) {
1854         if (tmp[j] > *norm) *norm = tmp[j];
1855       }
1856       PetscCall(PetscFree(tmp));
1857       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1858     } else if (type == NORM_INFINITY) { /* max row norm */
1859       PetscReal ntemp = 0.0;
1860       for (j = 0; j < aij->A->rmap->n; j++) {
1861         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1862         sum = 0.0;
1863         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1864           sum += PetscAbsScalar(*v);
1865           v++;
1866         }
1867         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1868         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1869           sum += PetscAbsScalar(*v);
1870           v++;
1871         }
1872         if (sum > ntemp) ntemp = sum;
1873       }
1874       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1875       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1876     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1877     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1878     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1879   }
1880   PetscFunctionReturn(PETSC_SUCCESS);
1881 }
1882 
1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1884 {
1885   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1886   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1887   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1888   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1889   Mat              B, A_diag, *B_diag;
1890   const MatScalar *pbv, *bv;
1891 
1892   PetscFunctionBegin;
1893   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1894   ma = A->rmap->n;
1895   na = A->cmap->n;
1896   mb = a->B->rmap->n;
1897   nb = a->B->cmap->n;
1898   ai = Aloc->i;
1899   aj = Aloc->j;
1900   bi = Bloc->i;
1901   bj = Bloc->j;
1902   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1903     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1904     PetscSFNode         *oloc;
1905     PETSC_UNUSED PetscSF sf;
1906 
1907     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1908     /* compute d_nnz for preallocation */
1909     PetscCall(PetscArrayzero(d_nnz, na));
1910     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1911     /* compute local off-diagonal contributions */
1912     PetscCall(PetscArrayzero(g_nnz, nb));
1913     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1914     /* map those to global */
1915     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1916     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1917     PetscCall(PetscSFSetFromOptions(sf));
1918     PetscCall(PetscArrayzero(o_nnz, na));
1919     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1920     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1921     PetscCall(PetscSFDestroy(&sf));
1922 
1923     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1924     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1925     PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs));
1926     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1927     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1928     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1929   } else {
1930     B = *matout;
1931     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1932   }
1933 
1934   b           = (Mat_MPIAIJ *)B->data;
1935   A_diag      = a->A;
1936   B_diag      = &b->A;
1937   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1938   A_diag_ncol = A_diag->cmap->N;
1939   B_diag_ilen = sub_B_diag->ilen;
1940   B_diag_i    = sub_B_diag->i;
1941 
1942   /* Set ilen for diagonal of B */
1943   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1944 
1945   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1946   very quickly (=without using MatSetValues), because all writes are local. */
1947   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1948   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1949 
1950   /* copy over the B part */
1951   PetscCall(PetscMalloc1(bi[mb], &cols));
1952   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1953   pbv = bv;
1954   row = A->rmap->rstart;
1955   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1956   cols_tmp = cols;
1957   for (i = 0; i < mb; i++) {
1958     ncol = bi[i + 1] - bi[i];
1959     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1960     row++;
1961     if (pbv) pbv += ncol;
1962     if (cols_tmp) cols_tmp += ncol;
1963   }
1964   PetscCall(PetscFree(cols));
1965   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1966 
1967   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1968   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1969   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1970     *matout = B;
1971   } else {
1972     PetscCall(MatHeaderMerge(A, &B));
1973   }
1974   PetscFunctionReturn(PETSC_SUCCESS);
1975 }
1976 
1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1978 {
1979   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1980   Mat         a = aij->A, b = aij->B;
1981   PetscInt    s1, s2, s3;
1982 
1983   PetscFunctionBegin;
1984   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1985   if (rr) {
1986     PetscCall(VecGetLocalSize(rr, &s1));
1987     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1988     /* Overlap communication with computation. */
1989     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1990   }
1991   if (ll) {
1992     PetscCall(VecGetLocalSize(ll, &s1));
1993     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1994     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1995   }
1996   /* scale  the diagonal block */
1997   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1998 
1999   if (rr) {
2000     /* Do a scatter end and then right scale the off-diagonal block */
2001     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2002     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2003   }
2004   PetscFunctionReturn(PETSC_SUCCESS);
2005 }
2006 
2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2008 {
2009   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2010 
2011   PetscFunctionBegin;
2012   PetscCall(MatSetUnfactored(a->A));
2013   PetscFunctionReturn(PETSC_SUCCESS);
2014 }
2015 
2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2017 {
2018   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2019   Mat         a, b, c, d;
2020   PetscBool   flg;
2021 
2022   PetscFunctionBegin;
2023   a = matA->A;
2024   b = matA->B;
2025   c = matB->A;
2026   d = matB->B;
2027 
2028   PetscCall(MatEqual(a, c, &flg));
2029   if (flg) PetscCall(MatEqual(b, d, &flg));
2030   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2035 {
2036   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2037   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2038 
2039   PetscFunctionBegin;
2040   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2041   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2042     /* because of the column compression in the off-processor part of the matrix a->B,
2043        the number of columns in a->B and b->B may be different, hence we cannot call
2044        the MatCopy() directly on the two parts. If need be, we can provide a more
2045        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2046        then copying the submatrices */
2047     PetscCall(MatCopy_Basic(A, B, str));
2048   } else {
2049     PetscCall(MatCopy(a->A, b->A, str));
2050     PetscCall(MatCopy(a->B, b->B, str));
2051   }
2052   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2053   PetscFunctionReturn(PETSC_SUCCESS);
2054 }
2055 
2056 /*
2057    Computes the number of nonzeros per row needed for preallocation when X and Y
2058    have different nonzero structure.
2059 */
2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2061 {
2062   PetscInt i, j, k, nzx, nzy;
2063 
2064   PetscFunctionBegin;
2065   /* Set the number of nonzeros in the new matrix */
2066   for (i = 0; i < m; i++) {
2067     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2068     nzx    = xi[i + 1] - xi[i];
2069     nzy    = yi[i + 1] - yi[i];
2070     nnz[i] = 0;
2071     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2072       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2073       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2074       nnz[i]++;
2075     }
2076     for (; k < nzy; k++) nnz[i]++;
2077   }
2078   PetscFunctionReturn(PETSC_SUCCESS);
2079 }
2080 
2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2083 {
2084   PetscInt    m = Y->rmap->N;
2085   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2086   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2087 
2088   PetscFunctionBegin;
2089   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2090   PetscFunctionReturn(PETSC_SUCCESS);
2091 }
2092 
2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2094 {
2095   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2096 
2097   PetscFunctionBegin;
2098   if (str == SAME_NONZERO_PATTERN) {
2099     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2100     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2101   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2102     PetscCall(MatAXPY_Basic(Y, a, X, str));
2103   } else {
2104     Mat       B;
2105     PetscInt *nnz_d, *nnz_o;
2106 
2107     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2108     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2109     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2110     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2111     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2112     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2113     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2114     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2115     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2116     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2117     PetscCall(MatHeaderMerge(Y, &B));
2118     PetscCall(PetscFree(nnz_d));
2119     PetscCall(PetscFree(nnz_o));
2120   }
2121   PetscFunctionReturn(PETSC_SUCCESS);
2122 }
2123 
2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2125 
2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2127 {
2128   PetscFunctionBegin;
2129   if (PetscDefined(USE_COMPLEX)) {
2130     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2131 
2132     PetscCall(MatConjugate_SeqAIJ(aij->A));
2133     PetscCall(MatConjugate_SeqAIJ(aij->B));
2134   }
2135   PetscFunctionReturn(PETSC_SUCCESS);
2136 }
2137 
2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2139 {
2140   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2141 
2142   PetscFunctionBegin;
2143   PetscCall(MatRealPart(a->A));
2144   PetscCall(MatRealPart(a->B));
2145   PetscFunctionReturn(PETSC_SUCCESS);
2146 }
2147 
2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2149 {
2150   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2151 
2152   PetscFunctionBegin;
2153   PetscCall(MatImaginaryPart(a->A));
2154   PetscCall(MatImaginaryPart(a->B));
2155   PetscFunctionReturn(PETSC_SUCCESS);
2156 }
2157 
2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2159 {
2160   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2161   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2162   PetscScalar       *vv;
2163   Vec                vB, vA;
2164   const PetscScalar *va, *vb;
2165 
2166   PetscFunctionBegin;
2167   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2168   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2169 
2170   PetscCall(VecGetArrayRead(vA, &va));
2171   if (idx) {
2172     for (i = 0; i < m; i++) {
2173       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2174     }
2175   }
2176 
2177   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2178   PetscCall(PetscMalloc1(m, &idxb));
2179   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2180 
2181   PetscCall(VecGetArrayWrite(v, &vv));
2182   PetscCall(VecGetArrayRead(vB, &vb));
2183   for (i = 0; i < m; i++) {
2184     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2185       vv[i] = vb[i];
2186       if (idx) idx[i] = a->garray[idxb[i]];
2187     } else {
2188       vv[i] = va[i];
2189       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2190     }
2191   }
2192   PetscCall(VecRestoreArrayWrite(v, &vv));
2193   PetscCall(VecRestoreArrayRead(vA, &va));
2194   PetscCall(VecRestoreArrayRead(vB, &vb));
2195   PetscCall(PetscFree(idxb));
2196   PetscCall(VecDestroy(&vA));
2197   PetscCall(VecDestroy(&vB));
2198   PetscFunctionReturn(PETSC_SUCCESS);
2199 }
2200 
2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2202 {
2203   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2204   Vec         vB, vA;
2205 
2206   PetscFunctionBegin;
2207   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2208   PetscCall(MatGetRowSumAbs(a->A, vA));
2209   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2210   PetscCall(MatGetRowSumAbs(a->B, vB));
2211   PetscCall(VecAXPY(vA, 1.0, vB));
2212   PetscCall(VecDestroy(&vB));
2213   PetscCall(VecCopy(vA, v));
2214   PetscCall(VecDestroy(&vA));
2215   PetscFunctionReturn(PETSC_SUCCESS);
2216 }
2217 
2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2219 {
2220   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2221   PetscInt           m = A->rmap->n, n = A->cmap->n;
2222   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2223   PetscInt          *cmap = mat->garray;
2224   PetscInt          *diagIdx, *offdiagIdx;
2225   Vec                diagV, offdiagV;
2226   PetscScalar       *a, *diagA, *offdiagA;
2227   const PetscScalar *ba, *bav;
2228   PetscInt           r, j, col, ncols, *bi, *bj;
2229   Mat                B = mat->B;
2230   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2231 
2232   PetscFunctionBegin;
2233   /* When a process holds entire A and other processes have no entry */
2234   if (A->cmap->N == n) {
2235     PetscCall(VecGetArrayWrite(v, &diagA));
2236     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2237     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2238     PetscCall(VecDestroy(&diagV));
2239     PetscCall(VecRestoreArrayWrite(v, &diagA));
2240     PetscFunctionReturn(PETSC_SUCCESS);
2241   } else if (n == 0) {
2242     if (m) {
2243       PetscCall(VecGetArrayWrite(v, &a));
2244       for (r = 0; r < m; r++) {
2245         a[r] = 0.0;
2246         if (idx) idx[r] = -1;
2247       }
2248       PetscCall(VecRestoreArrayWrite(v, &a));
2249     }
2250     PetscFunctionReturn(PETSC_SUCCESS);
2251   }
2252 
2253   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2254   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2255   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2256   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2257 
2258   /* Get offdiagIdx[] for implicit 0.0 */
2259   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2260   ba = bav;
2261   bi = b->i;
2262   bj = b->j;
2263   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2264   for (r = 0; r < m; r++) {
2265     ncols = bi[r + 1] - bi[r];
2266     if (ncols == A->cmap->N - n) { /* Brow is dense */
2267       offdiagA[r]   = *ba;
2268       offdiagIdx[r] = cmap[0];
2269     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2270       offdiagA[r] = 0.0;
2271 
2272       /* Find first hole in the cmap */
2273       for (j = 0; j < ncols; j++) {
2274         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2275         if (col > j && j < cstart) {
2276           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2277           break;
2278         } else if (col > j + n && j >= cstart) {
2279           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2280           break;
2281         }
2282       }
2283       if (j == ncols && ncols < A->cmap->N - n) {
2284         /* a hole is outside compressed Bcols */
2285         if (ncols == 0) {
2286           if (cstart) {
2287             offdiagIdx[r] = 0;
2288           } else offdiagIdx[r] = cend;
2289         } else { /* ncols > 0 */
2290           offdiagIdx[r] = cmap[ncols - 1] + 1;
2291           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2292         }
2293       }
2294     }
2295 
2296     for (j = 0; j < ncols; j++) {
2297       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2298         offdiagA[r]   = *ba;
2299         offdiagIdx[r] = cmap[*bj];
2300       }
2301       ba++;
2302       bj++;
2303     }
2304   }
2305 
2306   PetscCall(VecGetArrayWrite(v, &a));
2307   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2308   for (r = 0; r < m; ++r) {
2309     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2310       a[r] = diagA[r];
2311       if (idx) idx[r] = cstart + diagIdx[r];
2312     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2313       a[r] = diagA[r];
2314       if (idx) {
2315         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2316           idx[r] = cstart + diagIdx[r];
2317         } else idx[r] = offdiagIdx[r];
2318       }
2319     } else {
2320       a[r] = offdiagA[r];
2321       if (idx) idx[r] = offdiagIdx[r];
2322     }
2323   }
2324   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2325   PetscCall(VecRestoreArrayWrite(v, &a));
2326   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2327   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2328   PetscCall(VecDestroy(&diagV));
2329   PetscCall(VecDestroy(&offdiagV));
2330   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2331   PetscFunctionReturn(PETSC_SUCCESS);
2332 }
2333 
2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2335 {
2336   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2337   PetscInt           m = A->rmap->n, n = A->cmap->n;
2338   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2339   PetscInt          *cmap = mat->garray;
2340   PetscInt          *diagIdx, *offdiagIdx;
2341   Vec                diagV, offdiagV;
2342   PetscScalar       *a, *diagA, *offdiagA;
2343   const PetscScalar *ba, *bav;
2344   PetscInt           r, j, col, ncols, *bi, *bj;
2345   Mat                B = mat->B;
2346   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2347 
2348   PetscFunctionBegin;
2349   /* When a process holds entire A and other processes have no entry */
2350   if (A->cmap->N == n) {
2351     PetscCall(VecGetArrayWrite(v, &diagA));
2352     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2353     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2354     PetscCall(VecDestroy(&diagV));
2355     PetscCall(VecRestoreArrayWrite(v, &diagA));
2356     PetscFunctionReturn(PETSC_SUCCESS);
2357   } else if (n == 0) {
2358     if (m) {
2359       PetscCall(VecGetArrayWrite(v, &a));
2360       for (r = 0; r < m; r++) {
2361         a[r] = PETSC_MAX_REAL;
2362         if (idx) idx[r] = -1;
2363       }
2364       PetscCall(VecRestoreArrayWrite(v, &a));
2365     }
2366     PetscFunctionReturn(PETSC_SUCCESS);
2367   }
2368 
2369   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2370   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2371   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2372   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2373 
2374   /* Get offdiagIdx[] for implicit 0.0 */
2375   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2376   ba = bav;
2377   bi = b->i;
2378   bj = b->j;
2379   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2380   for (r = 0; r < m; r++) {
2381     ncols = bi[r + 1] - bi[r];
2382     if (ncols == A->cmap->N - n) { /* Brow is dense */
2383       offdiagA[r]   = *ba;
2384       offdiagIdx[r] = cmap[0];
2385     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2386       offdiagA[r] = 0.0;
2387 
2388       /* Find first hole in the cmap */
2389       for (j = 0; j < ncols; j++) {
2390         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2391         if (col > j && j < cstart) {
2392           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2393           break;
2394         } else if (col > j + n && j >= cstart) {
2395           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2396           break;
2397         }
2398       }
2399       if (j == ncols && ncols < A->cmap->N - n) {
2400         /* a hole is outside compressed Bcols */
2401         if (ncols == 0) {
2402           if (cstart) {
2403             offdiagIdx[r] = 0;
2404           } else offdiagIdx[r] = cend;
2405         } else { /* ncols > 0 */
2406           offdiagIdx[r] = cmap[ncols - 1] + 1;
2407           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2408         }
2409       }
2410     }
2411 
2412     for (j = 0; j < ncols; j++) {
2413       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2414         offdiagA[r]   = *ba;
2415         offdiagIdx[r] = cmap[*bj];
2416       }
2417       ba++;
2418       bj++;
2419     }
2420   }
2421 
2422   PetscCall(VecGetArrayWrite(v, &a));
2423   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2424   for (r = 0; r < m; ++r) {
2425     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2426       a[r] = diagA[r];
2427       if (idx) idx[r] = cstart + diagIdx[r];
2428     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2429       a[r] = diagA[r];
2430       if (idx) {
2431         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2432           idx[r] = cstart + diagIdx[r];
2433         } else idx[r] = offdiagIdx[r];
2434       }
2435     } else {
2436       a[r] = offdiagA[r];
2437       if (idx) idx[r] = offdiagIdx[r];
2438     }
2439   }
2440   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2441   PetscCall(VecRestoreArrayWrite(v, &a));
2442   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2443   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2444   PetscCall(VecDestroy(&diagV));
2445   PetscCall(VecDestroy(&offdiagV));
2446   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2447   PetscFunctionReturn(PETSC_SUCCESS);
2448 }
2449 
2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2451 {
2452   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2453   PetscInt           m = A->rmap->n, n = A->cmap->n;
2454   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2455   PetscInt          *cmap = mat->garray;
2456   PetscInt          *diagIdx, *offdiagIdx;
2457   Vec                diagV, offdiagV;
2458   PetscScalar       *a, *diagA, *offdiagA;
2459   const PetscScalar *ba, *bav;
2460   PetscInt           r, j, col, ncols, *bi, *bj;
2461   Mat                B = mat->B;
2462   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2463 
2464   PetscFunctionBegin;
2465   /* When a process holds entire A and other processes have no entry */
2466   if (A->cmap->N == n) {
2467     PetscCall(VecGetArrayWrite(v, &diagA));
2468     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2469     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2470     PetscCall(VecDestroy(&diagV));
2471     PetscCall(VecRestoreArrayWrite(v, &diagA));
2472     PetscFunctionReturn(PETSC_SUCCESS);
2473   } else if (n == 0) {
2474     if (m) {
2475       PetscCall(VecGetArrayWrite(v, &a));
2476       for (r = 0; r < m; r++) {
2477         a[r] = PETSC_MIN_REAL;
2478         if (idx) idx[r] = -1;
2479       }
2480       PetscCall(VecRestoreArrayWrite(v, &a));
2481     }
2482     PetscFunctionReturn(PETSC_SUCCESS);
2483   }
2484 
2485   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2486   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2487   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2488   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2489 
2490   /* Get offdiagIdx[] for implicit 0.0 */
2491   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2492   ba = bav;
2493   bi = b->i;
2494   bj = b->j;
2495   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2496   for (r = 0; r < m; r++) {
2497     ncols = bi[r + 1] - bi[r];
2498     if (ncols == A->cmap->N - n) { /* Brow is dense */
2499       offdiagA[r]   = *ba;
2500       offdiagIdx[r] = cmap[0];
2501     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2502       offdiagA[r] = 0.0;
2503 
2504       /* Find first hole in the cmap */
2505       for (j = 0; j < ncols; j++) {
2506         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2507         if (col > j && j < cstart) {
2508           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2509           break;
2510         } else if (col > j + n && j >= cstart) {
2511           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2512           break;
2513         }
2514       }
2515       if (j == ncols && ncols < A->cmap->N - n) {
2516         /* a hole is outside compressed Bcols */
2517         if (ncols == 0) {
2518           if (cstart) {
2519             offdiagIdx[r] = 0;
2520           } else offdiagIdx[r] = cend;
2521         } else { /* ncols > 0 */
2522           offdiagIdx[r] = cmap[ncols - 1] + 1;
2523           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2524         }
2525       }
2526     }
2527 
2528     for (j = 0; j < ncols; j++) {
2529       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2530         offdiagA[r]   = *ba;
2531         offdiagIdx[r] = cmap[*bj];
2532       }
2533       ba++;
2534       bj++;
2535     }
2536   }
2537 
2538   PetscCall(VecGetArrayWrite(v, &a));
2539   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2540   for (r = 0; r < m; ++r) {
2541     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2542       a[r] = diagA[r];
2543       if (idx) idx[r] = cstart + diagIdx[r];
2544     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2545       a[r] = diagA[r];
2546       if (idx) {
2547         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2548           idx[r] = cstart + diagIdx[r];
2549         } else idx[r] = offdiagIdx[r];
2550       }
2551     } else {
2552       a[r] = offdiagA[r];
2553       if (idx) idx[r] = offdiagIdx[r];
2554     }
2555   }
2556   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2557   PetscCall(VecRestoreArrayWrite(v, &a));
2558   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2559   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2560   PetscCall(VecDestroy(&diagV));
2561   PetscCall(VecDestroy(&offdiagV));
2562   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2563   PetscFunctionReturn(PETSC_SUCCESS);
2564 }
2565 
2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2567 {
2568   Mat *dummy;
2569 
2570   PetscFunctionBegin;
2571   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2572   *newmat = *dummy;
2573   PetscCall(PetscFree(dummy));
2574   PetscFunctionReturn(PETSC_SUCCESS);
2575 }
2576 
2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2578 {
2579   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2580 
2581   PetscFunctionBegin;
2582   PetscCall(MatInvertBlockDiagonal(a->A, values));
2583   A->factorerrortype = a->A->factorerrortype;
2584   PetscFunctionReturn(PETSC_SUCCESS);
2585 }
2586 
2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2588 {
2589   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2590 
2591   PetscFunctionBegin;
2592   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2593   PetscCall(MatSetRandom(aij->A, rctx));
2594   if (x->assembled) {
2595     PetscCall(MatSetRandom(aij->B, rctx));
2596   } else {
2597     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2598   }
2599   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2600   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2601   PetscFunctionReturn(PETSC_SUCCESS);
2602 }
2603 
2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2605 {
2606   PetscFunctionBegin;
2607   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2608   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2609   PetscFunctionReturn(PETSC_SUCCESS);
2610 }
2611 
2612 /*@
2613   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2614 
2615   Not Collective
2616 
2617   Input Parameter:
2618 . A - the matrix
2619 
2620   Output Parameter:
2621 . nz - the number of nonzeros
2622 
2623   Level: advanced
2624 
2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2626 @*/
2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2628 {
2629   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2630   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2631   PetscBool   isaij;
2632 
2633   PetscFunctionBegin;
2634   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2635   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2636   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2637   PetscFunctionReturn(PETSC_SUCCESS);
2638 }
2639 
2640 /*@
2641   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2642 
2643   Collective
2644 
2645   Input Parameters:
2646 + A  - the matrix
2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2648 
2649   Level: advanced
2650 
2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2652 @*/
2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2654 {
2655   PetscFunctionBegin;
2656   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2657   PetscFunctionReturn(PETSC_SUCCESS);
2658 }
2659 
2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject)
2661 {
2662   PetscBool sc = PETSC_FALSE, flg;
2663 
2664   PetscFunctionBegin;
2665   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2666   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2667   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2668   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2669   PetscOptionsHeadEnd();
2670   PetscFunctionReturn(PETSC_SUCCESS);
2671 }
2672 
2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2674 {
2675   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2676   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2677 
2678   PetscFunctionBegin;
2679   if (!Y->preallocated) {
2680     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2681   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2682     PetscInt nonew = aij->nonew;
2683     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2684     aij->nonew = nonew;
2685   }
2686   PetscCall(MatShift_Basic(Y, a));
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2691 {
2692   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2693 
2694   PetscFunctionBegin;
2695   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2696   PetscCall(MatMissingDiagonal(a->A, missing, d));
2697   if (d) {
2698     PetscInt rstart;
2699     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2700     *d += rstart;
2701   }
2702   PetscFunctionReturn(PETSC_SUCCESS);
2703 }
2704 
2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2706 {
2707   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2708 
2709   PetscFunctionBegin;
2710   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2711   PetscFunctionReturn(PETSC_SUCCESS);
2712 }
2713 
2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2715 {
2716   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2717 
2718   PetscFunctionBegin;
2719   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2720   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2721   PetscFunctionReturn(PETSC_SUCCESS);
2722 }
2723 
2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2725                                        MatGetRow_MPIAIJ,
2726                                        MatRestoreRow_MPIAIJ,
2727                                        MatMult_MPIAIJ,
2728                                        /* 4*/ MatMultAdd_MPIAIJ,
2729                                        MatMultTranspose_MPIAIJ,
2730                                        MatMultTransposeAdd_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        /*10*/ NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        MatSOR_MPIAIJ,
2738                                        MatTranspose_MPIAIJ,
2739                                        /*15*/ MatGetInfo_MPIAIJ,
2740                                        MatEqual_MPIAIJ,
2741                                        MatGetDiagonal_MPIAIJ,
2742                                        MatDiagonalScale_MPIAIJ,
2743                                        MatNorm_MPIAIJ,
2744                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2745                                        MatAssemblyEnd_MPIAIJ,
2746                                        MatSetOption_MPIAIJ,
2747                                        MatZeroEntries_MPIAIJ,
2748                                        /*24*/ MatZeroRows_MPIAIJ,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                        /*29*/ MatSetUp_MPI_Hash,
2754                                        NULL,
2755                                        NULL,
2756                                        MatGetDiagonalBlock_MPIAIJ,
2757                                        NULL,
2758                                        /*34*/ MatDuplicate_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        /*39*/ MatAXPY_MPIAIJ,
2764                                        MatCreateSubMatrices_MPIAIJ,
2765                                        MatIncreaseOverlap_MPIAIJ,
2766                                        MatGetValues_MPIAIJ,
2767                                        MatCopy_MPIAIJ,
2768                                        /*44*/ MatGetRowMax_MPIAIJ,
2769                                        MatScale_MPIAIJ,
2770                                        MatShift_MPIAIJ,
2771                                        MatDiagonalSet_MPIAIJ,
2772                                        MatZeroRowsColumns_MPIAIJ,
2773                                        /*49*/ MatSetRandom_MPIAIJ,
2774                                        MatGetRowIJ_MPIAIJ,
2775                                        MatRestoreRowIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2779                                        NULL,
2780                                        MatSetUnfactored_MPIAIJ,
2781                                        MatPermute_MPIAIJ,
2782                                        NULL,
2783                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2784                                        MatDestroy_MPIAIJ,
2785                                        MatView_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        /*64*/ NULL,
2789                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2790                                        NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2794                                        MatGetRowMinAbs_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        /*75*/ MatFDColoringApply_AIJ,
2800                                        MatSetFromOptions_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        MatFindZeroDiagonals_MPIAIJ,
2804                                        /*80*/ NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        /*83*/ MatLoad_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        /*89*/ NULL,
2814                                        NULL,
2815                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2816                                        NULL,
2817                                        NULL,
2818                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                        MatBindToCPU_MPIAIJ,
2823                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2824                                        NULL,
2825                                        NULL,
2826                                        MatConjugate_MPIAIJ,
2827                                        NULL,
2828                                        /*104*/ MatSetValuesRow_MPIAIJ,
2829                                        MatRealPart_MPIAIJ,
2830                                        MatImaginaryPart_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        /*109*/ NULL,
2834                                        NULL,
2835                                        MatGetRowMin_MPIAIJ,
2836                                        NULL,
2837                                        MatMissingDiagonal_MPIAIJ,
2838                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2839                                        NULL,
2840                                        MatGetGhosts_MPIAIJ,
2841                                        NULL,
2842                                        NULL,
2843                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2844                                        NULL,
2845                                        NULL,
2846                                        NULL,
2847                                        MatGetMultiProcBlock_MPIAIJ,
2848                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2849                                        MatGetColumnReductions_MPIAIJ,
2850                                        MatInvertBlockDiagonal_MPIAIJ,
2851                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2852                                        MatCreateSubMatricesMPI_MPIAIJ,
2853                                        /*129*/ NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2857                                        NULL,
2858                                        /*134*/ NULL,
2859                                        NULL,
2860                                        NULL,
2861                                        NULL,
2862                                        NULL,
2863                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2864                                        NULL,
2865                                        NULL,
2866                                        MatFDColoringSetUp_MPIXAIJ,
2867                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2868                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2869                                        /*145*/ NULL,
2870                                        NULL,
2871                                        NULL,
2872                                        MatCreateGraph_Simple_AIJ,
2873                                        NULL,
2874                                        /*150*/ NULL,
2875                                        MatEliminateZeros_MPIAIJ,
2876                                        MatGetRowSumAbs_MPIAIJ,
2877                                        NULL,
2878                                        NULL,
2879                                        /*155*/ NULL,
2880                                        MatCopyHashToXAIJ_MPI_Hash};
2881 
2882 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2883 {
2884   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2885 
2886   PetscFunctionBegin;
2887   PetscCall(MatStoreValues(aij->A));
2888   PetscCall(MatStoreValues(aij->B));
2889   PetscFunctionReturn(PETSC_SUCCESS);
2890 }
2891 
2892 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2893 {
2894   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2895 
2896   PetscFunctionBegin;
2897   PetscCall(MatRetrieveValues(aij->A));
2898   PetscCall(MatRetrieveValues(aij->B));
2899   PetscFunctionReturn(PETSC_SUCCESS);
2900 }
2901 
2902 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2903 {
2904   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2905   PetscMPIInt size;
2906 
2907   PetscFunctionBegin;
2908   if (B->hash_active) {
2909     B->ops[0]      = b->cops;
2910     B->hash_active = PETSC_FALSE;
2911   }
2912   PetscCall(PetscLayoutSetUp(B->rmap));
2913   PetscCall(PetscLayoutSetUp(B->cmap));
2914 
2915 #if defined(PETSC_USE_CTABLE)
2916   PetscCall(PetscHMapIDestroy(&b->colmap));
2917 #else
2918   PetscCall(PetscFree(b->colmap));
2919 #endif
2920   PetscCall(PetscFree(b->garray));
2921   PetscCall(VecDestroy(&b->lvec));
2922   PetscCall(VecScatterDestroy(&b->Mvctx));
2923 
2924   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2925 
2926   MatSeqXAIJGetOptions_Private(b->B);
2927   PetscCall(MatDestroy(&b->B));
2928   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2929   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2930   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2931   PetscCall(MatSetType(b->B, MATSEQAIJ));
2932   MatSeqXAIJRestoreOptions_Private(b->B);
2933 
2934   MatSeqXAIJGetOptions_Private(b->A);
2935   PetscCall(MatDestroy(&b->A));
2936   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2937   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2938   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2939   PetscCall(MatSetType(b->A, MATSEQAIJ));
2940   MatSeqXAIJRestoreOptions_Private(b->A);
2941 
2942   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2943   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2944   B->preallocated  = PETSC_TRUE;
2945   B->was_assembled = PETSC_FALSE;
2946   B->assembled     = PETSC_FALSE;
2947   PetscFunctionReturn(PETSC_SUCCESS);
2948 }
2949 
2950 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2951 {
2952   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2953   PetscBool   ondiagreset, offdiagreset, memoryreset;
2954 
2955   PetscFunctionBegin;
2956   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2957   PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()");
2958   if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS);
2959 
2960   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset));
2961   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset));
2962   memoryreset = (PetscBool)(ondiagreset || offdiagreset);
2963   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B)));
2964   if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS);
2965 
2966   PetscCall(PetscLayoutSetUp(B->rmap));
2967   PetscCall(PetscLayoutSetUp(B->cmap));
2968   PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled");
2969   PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2970   PetscCall(VecScatterDestroy(&b->Mvctx));
2971 
2972   B->preallocated  = PETSC_TRUE;
2973   B->was_assembled = PETSC_FALSE;
2974   B->assembled     = PETSC_FALSE;
2975   /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */
2976   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2977   PetscFunctionReturn(PETSC_SUCCESS);
2978 }
2979 
2980 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2981 {
2982   Mat         mat;
2983   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2984 
2985   PetscFunctionBegin;
2986   *newmat = NULL;
2987   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2988   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2989   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2990   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2991   a = (Mat_MPIAIJ *)mat->data;
2992 
2993   mat->factortype = matin->factortype;
2994   mat->assembled  = matin->assembled;
2995   mat->insertmode = NOT_SET_VALUES;
2996 
2997   a->size         = oldmat->size;
2998   a->rank         = oldmat->rank;
2999   a->donotstash   = oldmat->donotstash;
3000   a->roworiented  = oldmat->roworiented;
3001   a->rowindices   = NULL;
3002   a->rowvalues    = NULL;
3003   a->getrowactive = PETSC_FALSE;
3004 
3005   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3006   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3007   if (matin->hash_active) {
3008     PetscCall(MatSetUp(mat));
3009   } else {
3010     mat->preallocated = matin->preallocated;
3011     if (oldmat->colmap) {
3012 #if defined(PETSC_USE_CTABLE)
3013       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3014 #else
3015       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3016       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3017 #endif
3018     } else a->colmap = NULL;
3019     if (oldmat->garray) {
3020       PetscInt len;
3021       len = oldmat->B->cmap->n;
3022       PetscCall(PetscMalloc1(len + 1, &a->garray));
3023       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3024     } else a->garray = NULL;
3025 
3026     /* It may happen MatDuplicate is called with a non-assembled matrix
3027       In fact, MatDuplicate only requires the matrix to be preallocated
3028       This may happen inside a DMCreateMatrix_Shell */
3029     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3030     if (oldmat->Mvctx) {
3031       a->Mvctx = oldmat->Mvctx;
3032       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3033     }
3034     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3035     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3036   }
3037   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3038   *newmat = mat;
3039   PetscFunctionReturn(PETSC_SUCCESS);
3040 }
3041 
3042 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3043 {
3044   PetscBool isbinary, ishdf5;
3045 
3046   PetscFunctionBegin;
3047   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3048   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3049   /* force binary viewer to load .info file if it has not yet done so */
3050   PetscCall(PetscViewerSetUp(viewer));
3051   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3052   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3053   if (isbinary) {
3054     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3055   } else if (ishdf5) {
3056 #if defined(PETSC_HAVE_HDF5)
3057     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3058 #else
3059     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3060 #endif
3061   } else {
3062     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3063   }
3064   PetscFunctionReturn(PETSC_SUCCESS);
3065 }
3066 
3067 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3068 {
3069   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3070   PetscInt    *rowidxs, *colidxs;
3071   PetscScalar *matvals;
3072 
3073   PetscFunctionBegin;
3074   PetscCall(PetscViewerSetUp(viewer));
3075 
3076   /* read in matrix header */
3077   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3078   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3079   M  = header[1];
3080   N  = header[2];
3081   nz = header[3];
3082   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3083   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3084   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3085 
3086   /* set block sizes from the viewer's .info file */
3087   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3088   /* set global sizes if not set already */
3089   if (mat->rmap->N < 0) mat->rmap->N = M;
3090   if (mat->cmap->N < 0) mat->cmap->N = N;
3091   PetscCall(PetscLayoutSetUp(mat->rmap));
3092   PetscCall(PetscLayoutSetUp(mat->cmap));
3093 
3094   /* check if the matrix sizes are correct */
3095   PetscCall(MatGetSize(mat, &rows, &cols));
3096   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3097 
3098   /* read in row lengths and build row indices */
3099   PetscCall(MatGetLocalSize(mat, &m, NULL));
3100   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3101   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3102   rowidxs[0] = 0;
3103   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3104   if (nz != PETSC_INT_MAX) {
3105     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3106     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3107   }
3108 
3109   /* read in column indices and matrix values */
3110   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3111   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3112   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3113   /* store matrix indices and values */
3114   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3115   PetscCall(PetscFree(rowidxs));
3116   PetscCall(PetscFree2(colidxs, matvals));
3117   PetscFunctionReturn(PETSC_SUCCESS);
3118 }
3119 
3120 /* Not scalable because of ISAllGather() unless getting all columns. */
3121 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3122 {
3123   IS          iscol_local;
3124   PetscBool   isstride;
3125   PetscMPIInt gisstride = 0;
3126 
3127   PetscFunctionBegin;
3128   /* check if we are grabbing all columns*/
3129   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3130 
3131   if (isstride) {
3132     PetscInt start, len, mstart, mlen;
3133     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3134     PetscCall(ISGetLocalSize(iscol, &len));
3135     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3136     if (mstart == start && mlen - mstart == len) gisstride = 1;
3137   }
3138 
3139   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3140   if (gisstride) {
3141     PetscInt N;
3142     PetscCall(MatGetSize(mat, NULL, &N));
3143     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3144     PetscCall(ISSetIdentity(iscol_local));
3145     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3146   } else {
3147     PetscInt cbs;
3148     PetscCall(ISGetBlockSize(iscol, &cbs));
3149     PetscCall(ISAllGather(iscol, &iscol_local));
3150     PetscCall(ISSetBlockSize(iscol_local, cbs));
3151   }
3152 
3153   *isseq = iscol_local;
3154   PetscFunctionReturn(PETSC_SUCCESS);
3155 }
3156 
3157 /*
3158  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3159  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3160 
3161  Input Parameters:
3162 +   mat - matrix
3163 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3164            i.e., mat->rstart <= isrow[i] < mat->rend
3165 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3166            i.e., mat->cstart <= iscol[i] < mat->cend
3167 
3168  Output Parameters:
3169 +   isrow_d - sequential row index set for retrieving mat->A
3170 .   iscol_d - sequential  column index set for retrieving mat->A
3171 .   iscol_o - sequential column index set for retrieving mat->B
3172 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3173  */
3174 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[])
3175 {
3176   Vec             x, cmap;
3177   const PetscInt *is_idx;
3178   PetscScalar    *xarray, *cmaparray;
3179   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3180   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3181   Mat             B    = a->B;
3182   Vec             lvec = a->lvec, lcmap;
3183   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3184   MPI_Comm        comm;
3185   VecScatter      Mvctx = a->Mvctx;
3186 
3187   PetscFunctionBegin;
3188   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3189   PetscCall(ISGetLocalSize(iscol, &ncols));
3190 
3191   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3192   PetscCall(MatCreateVecs(mat, &x, NULL));
3193   PetscCall(VecSet(x, -1.0));
3194   PetscCall(VecDuplicate(x, &cmap));
3195   PetscCall(VecSet(cmap, -1.0));
3196 
3197   /* Get start indices */
3198   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3199   isstart -= ncols;
3200   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3201 
3202   PetscCall(ISGetIndices(iscol, &is_idx));
3203   PetscCall(VecGetArray(x, &xarray));
3204   PetscCall(VecGetArray(cmap, &cmaparray));
3205   PetscCall(PetscMalloc1(ncols, &idx));
3206   for (i = 0; i < ncols; i++) {
3207     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3208     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3209     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3210   }
3211   PetscCall(VecRestoreArray(x, &xarray));
3212   PetscCall(VecRestoreArray(cmap, &cmaparray));
3213   PetscCall(ISRestoreIndices(iscol, &is_idx));
3214 
3215   /* Get iscol_d */
3216   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3217   PetscCall(ISGetBlockSize(iscol, &i));
3218   PetscCall(ISSetBlockSize(*iscol_d, i));
3219 
3220   /* Get isrow_d */
3221   PetscCall(ISGetLocalSize(isrow, &m));
3222   rstart = mat->rmap->rstart;
3223   PetscCall(PetscMalloc1(m, &idx));
3224   PetscCall(ISGetIndices(isrow, &is_idx));
3225   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3226   PetscCall(ISRestoreIndices(isrow, &is_idx));
3227 
3228   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3229   PetscCall(ISGetBlockSize(isrow, &i));
3230   PetscCall(ISSetBlockSize(*isrow_d, i));
3231 
3232   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3233   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3234   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3235 
3236   PetscCall(VecDuplicate(lvec, &lcmap));
3237 
3238   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3239   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3240 
3241   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3242   /* off-process column indices */
3243   count = 0;
3244   PetscCall(PetscMalloc1(Bn, &idx));
3245   PetscCall(PetscMalloc1(Bn, &cmap1));
3246 
3247   PetscCall(VecGetArray(lvec, &xarray));
3248   PetscCall(VecGetArray(lcmap, &cmaparray));
3249   for (i = 0; i < Bn; i++) {
3250     if (PetscRealPart(xarray[i]) > -1.0) {
3251       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3252       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3253       count++;
3254     }
3255   }
3256   PetscCall(VecRestoreArray(lvec, &xarray));
3257   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3258 
3259   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3260   /* cannot ensure iscol_o has same blocksize as iscol! */
3261 
3262   PetscCall(PetscFree(idx));
3263   *garray = cmap1;
3264 
3265   PetscCall(VecDestroy(&x));
3266   PetscCall(VecDestroy(&cmap));
3267   PetscCall(VecDestroy(&lcmap));
3268   PetscFunctionReturn(PETSC_SUCCESS);
3269 }
3270 
3271 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3272 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3273 {
3274   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3275   Mat         M = NULL;
3276   MPI_Comm    comm;
3277   IS          iscol_d, isrow_d, iscol_o;
3278   Mat         Asub = NULL, Bsub = NULL;
3279   PetscInt    n, count, M_size, N_size;
3280 
3281   PetscFunctionBegin;
3282   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3283 
3284   if (call == MAT_REUSE_MATRIX) {
3285     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3286     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3287     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3288 
3289     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3290     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3291 
3292     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3293     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3294 
3295     /* Update diagonal and off-diagonal portions of submat */
3296     asub = (Mat_MPIAIJ *)(*submat)->data;
3297     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3298     PetscCall(ISGetLocalSize(iscol_o, &n));
3299     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3300     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3301     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3302 
3303   } else { /* call == MAT_INITIAL_MATRIX) */
3304     PetscInt *garray, *garray_compact;
3305     PetscInt  BsubN;
3306 
3307     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3308     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3309 
3310     /* Create local submatrices Asub and Bsub */
3311     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3312     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3313 
3314     // Compact garray so its not of size Bn
3315     PetscCall(ISGetSize(iscol_o, &count));
3316     PetscCall(PetscMalloc1(count, &garray_compact));
3317     PetscCall(PetscArraycpy(garray_compact, garray, count));
3318 
3319     /* Create submatrix M */
3320     PetscCall(ISGetSize(isrow, &M_size));
3321     PetscCall(ISGetSize(iscol, &N_size));
3322     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M));
3323 
3324     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3325     asub = (Mat_MPIAIJ *)M->data;
3326 
3327     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3328     n = asub->B->cmap->N;
3329     if (BsubN > n) {
3330       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3331       const PetscInt *idx;
3332       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3333       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3334 
3335       PetscCall(PetscMalloc1(n, &idx_new));
3336       j = 0;
3337       PetscCall(ISGetIndices(iscol_o, &idx));
3338       for (i = 0; i < n; i++) {
3339         if (j >= BsubN) break;
3340         while (subgarray[i] > garray[j]) j++;
3341 
3342         if (subgarray[i] == garray[j]) {
3343           idx_new[i] = idx[j++];
3344         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3345       }
3346       PetscCall(ISRestoreIndices(iscol_o, &idx));
3347 
3348       PetscCall(ISDestroy(&iscol_o));
3349       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3350 
3351     } else if (BsubN < n) {
3352       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3353     }
3354 
3355     PetscCall(PetscFree(garray));
3356     *submat = M;
3357 
3358     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3359     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3360     PetscCall(ISDestroy(&isrow_d));
3361 
3362     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3363     PetscCall(ISDestroy(&iscol_d));
3364 
3365     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3366     PetscCall(ISDestroy(&iscol_o));
3367   }
3368   PetscFunctionReturn(PETSC_SUCCESS);
3369 }
3370 
3371 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3372 {
3373   IS        iscol_local = NULL, isrow_d;
3374   PetscInt  csize;
3375   PetscInt  n, i, j, start, end;
3376   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3377   MPI_Comm  comm;
3378 
3379   PetscFunctionBegin;
3380   /* If isrow has same processor distribution as mat,
3381      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3382   if (call == MAT_REUSE_MATRIX) {
3383     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3384     if (isrow_d) {
3385       sameRowDist  = PETSC_TRUE;
3386       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3387     } else {
3388       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3389       if (iscol_local) {
3390         sameRowDist  = PETSC_TRUE;
3391         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3392       }
3393     }
3394   } else {
3395     /* Check if isrow has same processor distribution as mat */
3396     sameDist[0] = PETSC_FALSE;
3397     PetscCall(ISGetLocalSize(isrow, &n));
3398     if (!n) {
3399       sameDist[0] = PETSC_TRUE;
3400     } else {
3401       PetscCall(ISGetMinMax(isrow, &i, &j));
3402       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3403       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3404     }
3405 
3406     /* Check if iscol has same processor distribution as mat */
3407     sameDist[1] = PETSC_FALSE;
3408     PetscCall(ISGetLocalSize(iscol, &n));
3409     if (!n) {
3410       sameDist[1] = PETSC_TRUE;
3411     } else {
3412       PetscCall(ISGetMinMax(iscol, &i, &j));
3413       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3414       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3415     }
3416 
3417     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3418     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3419     sameRowDist = tsameDist[0];
3420   }
3421 
3422   if (sameRowDist) {
3423     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3424       /* isrow and iscol have same processor distribution as mat */
3425       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3426       PetscFunctionReturn(PETSC_SUCCESS);
3427     } else { /* sameRowDist */
3428       /* isrow has same processor distribution as mat */
3429       if (call == MAT_INITIAL_MATRIX) {
3430         PetscBool sorted;
3431         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3432         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3433         PetscCall(ISGetSize(iscol, &i));
3434         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3435 
3436         PetscCall(ISSorted(iscol_local, &sorted));
3437         if (sorted) {
3438           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3439           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3440           PetscFunctionReturn(PETSC_SUCCESS);
3441         }
3442       } else { /* call == MAT_REUSE_MATRIX */
3443         IS iscol_sub;
3444         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3445         if (iscol_sub) {
3446           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3447           PetscFunctionReturn(PETSC_SUCCESS);
3448         }
3449       }
3450     }
3451   }
3452 
3453   /* General case: iscol -> iscol_local which has global size of iscol */
3454   if (call == MAT_REUSE_MATRIX) {
3455     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3456     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3457   } else {
3458     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3459   }
3460 
3461   PetscCall(ISGetLocalSize(iscol, &csize));
3462   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3463 
3464   if (call == MAT_INITIAL_MATRIX) {
3465     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3466     PetscCall(ISDestroy(&iscol_local));
3467   }
3468   PetscFunctionReturn(PETSC_SUCCESS);
3469 }
3470 
3471 /*@C
3472   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3473   and "off-diagonal" part of the matrix in CSR format.
3474 
3475   Collective
3476 
3477   Input Parameters:
3478 + comm   - MPI communicator
3479 . M      - the global row size
3480 . N      - the global column size
3481 . A      - "diagonal" portion of matrix
3482 . B      - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray
3483 - garray - either `NULL` or the global index of `B` columns. If not `NULL`, it should be allocated by `PetscMalloc1()` and will be owned by `mat` thereafter.
3484 
3485   Output Parameter:
3486 . mat - the matrix, with input `A` as its local diagonal matrix
3487 
3488   Level: advanced
3489 
3490   Notes:
3491   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3492 
3493   `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore.
3494 
3495   If `garray` is `NULL`, `B` will be compacted to use local indices. In this sense, `B`'s sparsity pattern (nonzerostate) will be changed. If `B` is a device matrix, we need to somehow also update
3496   `B`'s copy on device.  We do so by increasing `B`'s nonzerostate. In use of `B` on device, device matrix types should detect this change (ref. internal routines `MatSeqAIJCUSPARSECopyToGPU()` or
3497   `MatAssemblyEnd_SeqAIJKokkos()`) and will just destroy and then recreate the device copy of `B`. It is not optimal, but is easy to implement and less hacky. To avoid this overhead, try to compute `garray`
3498   yourself, see algorithms in the private function `MatSetUpMultiply_MPIAIJ()`.
3499 
3500 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3501 @*/
3502 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat)
3503 {
3504   PetscInt m, n;
3505   MatType  mpi_mat_type;
3506 
3507   PetscFunctionBegin;
3508   PetscCall(MatCreate(comm, mat));
3509   PetscCall(MatGetSize(A, &m, &n));
3510   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3511   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3512 
3513   PetscCall(MatSetSizes(*mat, m, n, M, N));
3514   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3515   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3516   PetscCall(MatSetType(*mat, mpi_mat_type));
3517 
3518   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3519 
3520   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3521   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3522   if (!garray) {
3523     const PetscScalar *ba;
3524 
3525     B->nonzerostate++;
3526     PetscCall(MatSeqAIJGetArrayRead(B, &ba)); /* Since we will destroy B's device copy, we need to make sure the host copy is up to date */
3527     PetscCall(MatSeqAIJRestoreArrayRead(B, &ba));
3528   }
3529   PetscCall(MatSetMPIAIJWithSplitSeqAIJ(*mat, A, B, garray));
3530   PetscFunctionReturn(PETSC_SUCCESS);
3531 }
3532 
3533 /*
3534   MatSetMPIAIJWithSplitSeqAIJ - Set the diag and offdiag matrices of a `MATMPIAIJ` matrix.
3535    It is similar to `MatCreateMPIAIJWithSplitArrays()`. This routine allows passing in
3536    B with local indices and the correct size, along with the accompanying
3537    garray, hence skipping compactification
3538 
3539   Collective
3540 
3541   Input Parameters:
3542 +  mat    - the MATMPIAIJ matrix, which should have its type and layout set, but should not have its diag, offdiag matrices set
3543 .  A      - the diag matrix using local col ids
3544 .  B      - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray
3545 -  garray - either `NULL` or the global index of `B` columns
3546 
3547   Output Parameter:
3548 .  mat   - the updated `MATMPIAIJ` matrix
3549 
3550   Level: advanced
3551 
3552   Notes:
3553   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3554 
3555   `A` and `B` become part of output mat. The user cannot use `A` and `B` anymore.
3556 
3557 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3558 */
3559 PETSC_INTERN PetscErrorCode MatSetMPIAIJWithSplitSeqAIJ(Mat mat, Mat A, Mat B, PetscInt *garray)
3560 {
3561   PetscFunctionBegin;
3562   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
3563   PetscInt    m, n, M, N, Am, An, Bm, Bn;
3564 
3565   PetscCall(MatGetSize(mat, &M, &N));
3566   PetscCall(MatGetLocalSize(mat, &m, &n));
3567   PetscCall(MatGetLocalSize(A, &Am, &An));
3568   PetscCall(MatGetLocalSize(B, &Bm, &Bn));
3569 
3570   PetscCheck(m == Am && m == Bm, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of rows do not match");
3571   PetscCheck(n == An, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of columns do not match");
3572   PetscCheck(!mpiaij->A && !mpiaij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "A, B of the MPIAIJ matrix are not empty");
3573   mpiaij->A      = A;
3574   mpiaij->B      = B;
3575   mpiaij->garray = garray;
3576 
3577   mat->preallocated     = PETSC_TRUE;
3578   mat->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */
3579 
3580   PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3581   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3582   /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and
3583    also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced
3584    */
3585   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3586   PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3587   PetscCall(MatSetOption(mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3588   PetscFunctionReturn(PETSC_SUCCESS);
3589 }
3590 
3591 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3592 
3593 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3594 {
3595   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3596   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3597   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3598   Mat             M, Msub, B = a->B;
3599   MatScalar      *aa;
3600   Mat_SeqAIJ     *aij;
3601   PetscInt       *garray = a->garray, *colsub, Ncols;
3602   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3603   IS              iscol_sub, iscmap;
3604   const PetscInt *is_idx, *cmap;
3605   PetscBool       allcolumns = PETSC_FALSE;
3606   MPI_Comm        comm;
3607 
3608   PetscFunctionBegin;
3609   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3610   if (call == MAT_REUSE_MATRIX) {
3611     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3612     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3613     PetscCall(ISGetLocalSize(iscol_sub, &count));
3614 
3615     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3616     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3617 
3618     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3619     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3620 
3621     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3622 
3623   } else { /* call == MAT_INITIAL_MATRIX) */
3624     PetscBool flg;
3625 
3626     PetscCall(ISGetLocalSize(iscol, &n));
3627     PetscCall(ISGetSize(iscol, &Ncols));
3628 
3629     /* (1) iscol -> nonscalable iscol_local */
3630     /* Check for special case: each processor gets entire matrix columns */
3631     PetscCall(ISIdentity(iscol_local, &flg));
3632     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3633     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3634     if (allcolumns) {
3635       iscol_sub = iscol_local;
3636       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3637       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3638 
3639     } else {
3640       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3641       PetscInt *idx, *cmap1, k;
3642       PetscCall(PetscMalloc1(Ncols, &idx));
3643       PetscCall(PetscMalloc1(Ncols, &cmap1));
3644       PetscCall(ISGetIndices(iscol_local, &is_idx));
3645       count = 0;
3646       k     = 0;
3647       for (i = 0; i < Ncols; i++) {
3648         j = is_idx[i];
3649         if (j >= cstart && j < cend) {
3650           /* diagonal part of mat */
3651           idx[count]     = j;
3652           cmap1[count++] = i; /* column index in submat */
3653         } else if (Bn) {
3654           /* off-diagonal part of mat */
3655           if (j == garray[k]) {
3656             idx[count]     = j;
3657             cmap1[count++] = i; /* column index in submat */
3658           } else if (j > garray[k]) {
3659             while (j > garray[k] && k < Bn - 1) k++;
3660             if (j == garray[k]) {
3661               idx[count]     = j;
3662               cmap1[count++] = i; /* column index in submat */
3663             }
3664           }
3665         }
3666       }
3667       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3668 
3669       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3670       PetscCall(ISGetBlockSize(iscol, &cbs));
3671       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3672 
3673       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3674     }
3675 
3676     /* (3) Create sequential Msub */
3677     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3678   }
3679 
3680   PetscCall(ISGetLocalSize(iscol_sub, &count));
3681   aij = (Mat_SeqAIJ *)Msub->data;
3682   ii  = aij->i;
3683   PetscCall(ISGetIndices(iscmap, &cmap));
3684 
3685   /*
3686       m - number of local rows
3687       Ncols - number of columns (same on all processors)
3688       rstart - first row in new global matrix generated
3689   */
3690   PetscCall(MatGetSize(Msub, &m, NULL));
3691 
3692   if (call == MAT_INITIAL_MATRIX) {
3693     /* (4) Create parallel newmat */
3694     PetscMPIInt rank, size;
3695     PetscInt    csize;
3696 
3697     PetscCallMPI(MPI_Comm_size(comm, &size));
3698     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3699 
3700     /*
3701         Determine the number of non-zeros in the diagonal and off-diagonal
3702         portions of the matrix in order to do correct preallocation
3703     */
3704 
3705     /* first get start and end of "diagonal" columns */
3706     PetscCall(ISGetLocalSize(iscol, &csize));
3707     if (csize == PETSC_DECIDE) {
3708       PetscCall(ISGetSize(isrow, &mglobal));
3709       if (mglobal == Ncols) { /* square matrix */
3710         nlocal = m;
3711       } else {
3712         nlocal = Ncols / size + ((Ncols % size) > rank);
3713       }
3714     } else {
3715       nlocal = csize;
3716     }
3717     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3718     rstart = rend - nlocal;
3719     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3720 
3721     /* next, compute all the lengths */
3722     jj = aij->j;
3723     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3724     olens = dlens + m;
3725     for (i = 0; i < m; i++) {
3726       jend = ii[i + 1] - ii[i];
3727       olen = 0;
3728       dlen = 0;
3729       for (j = 0; j < jend; j++) {
3730         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3731         else dlen++;
3732         jj++;
3733       }
3734       olens[i] = olen;
3735       dlens[i] = dlen;
3736     }
3737 
3738     PetscCall(ISGetBlockSize(isrow, &bs));
3739     PetscCall(ISGetBlockSize(iscol, &cbs));
3740 
3741     PetscCall(MatCreate(comm, &M));
3742     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3743     PetscCall(MatSetBlockSizes(M, bs, cbs));
3744     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3745     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3746     PetscCall(PetscFree(dlens));
3747 
3748   } else { /* call == MAT_REUSE_MATRIX */
3749     M = *newmat;
3750     PetscCall(MatGetLocalSize(M, &i, NULL));
3751     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3752     PetscCall(MatZeroEntries(M));
3753     /*
3754          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3755        rather than the slower MatSetValues().
3756     */
3757     M->was_assembled = PETSC_TRUE;
3758     M->assembled     = PETSC_FALSE;
3759   }
3760 
3761   /* (5) Set values of Msub to *newmat */
3762   PetscCall(PetscMalloc1(count, &colsub));
3763   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3764 
3765   jj = aij->j;
3766   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3767   for (i = 0; i < m; i++) {
3768     row = rstart + i;
3769     nz  = ii[i + 1] - ii[i];
3770     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3771     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3772     jj += nz;
3773     aa += nz;
3774   }
3775   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3776   PetscCall(ISRestoreIndices(iscmap, &cmap));
3777 
3778   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3779   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3780 
3781   PetscCall(PetscFree(colsub));
3782 
3783   /* save Msub, iscol_sub and iscmap used in processor for next request */
3784   if (call == MAT_INITIAL_MATRIX) {
3785     *newmat = M;
3786     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3787     PetscCall(MatDestroy(&Msub));
3788 
3789     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3790     PetscCall(ISDestroy(&iscol_sub));
3791 
3792     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3793     PetscCall(ISDestroy(&iscmap));
3794 
3795     if (iscol_local) {
3796       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3797       PetscCall(ISDestroy(&iscol_local));
3798     }
3799   }
3800   PetscFunctionReturn(PETSC_SUCCESS);
3801 }
3802 
3803 /*
3804     Not great since it makes two copies of the submatrix, first an SeqAIJ
3805   in local and then by concatenating the local matrices the end result.
3806   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3807 
3808   This requires a sequential iscol with all indices.
3809 */
3810 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3811 {
3812   PetscMPIInt rank, size;
3813   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3814   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3815   Mat         M, Mreuse;
3816   MatScalar  *aa, *vwork;
3817   MPI_Comm    comm;
3818   Mat_SeqAIJ *aij;
3819   PetscBool   colflag, allcolumns = PETSC_FALSE;
3820 
3821   PetscFunctionBegin;
3822   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3823   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3824   PetscCallMPI(MPI_Comm_size(comm, &size));
3825 
3826   /* Check for special case: each processor gets entire matrix columns */
3827   PetscCall(ISIdentity(iscol, &colflag));
3828   PetscCall(ISGetLocalSize(iscol, &n));
3829   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3830   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3831 
3832   if (call == MAT_REUSE_MATRIX) {
3833     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3834     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3835     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3836   } else {
3837     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3838   }
3839 
3840   /*
3841       m - number of local rows
3842       n - number of columns (same on all processors)
3843       rstart - first row in new global matrix generated
3844   */
3845   PetscCall(MatGetSize(Mreuse, &m, &n));
3846   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3847   if (call == MAT_INITIAL_MATRIX) {
3848     aij = (Mat_SeqAIJ *)Mreuse->data;
3849     ii  = aij->i;
3850     jj  = aij->j;
3851 
3852     /*
3853         Determine the number of non-zeros in the diagonal and off-diagonal
3854         portions of the matrix in order to do correct preallocation
3855     */
3856 
3857     /* first get start and end of "diagonal" columns */
3858     if (csize == PETSC_DECIDE) {
3859       PetscCall(ISGetSize(isrow, &mglobal));
3860       if (mglobal == n) { /* square matrix */
3861         nlocal = m;
3862       } else {
3863         nlocal = n / size + ((n % size) > rank);
3864       }
3865     } else {
3866       nlocal = csize;
3867     }
3868     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3869     rstart = rend - nlocal;
3870     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3871 
3872     /* next, compute all the lengths */
3873     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3874     olens = dlens + m;
3875     for (i = 0; i < m; i++) {
3876       jend = ii[i + 1] - ii[i];
3877       olen = 0;
3878       dlen = 0;
3879       for (j = 0; j < jend; j++) {
3880         if (*jj < rstart || *jj >= rend) olen++;
3881         else dlen++;
3882         jj++;
3883       }
3884       olens[i] = olen;
3885       dlens[i] = dlen;
3886     }
3887     PetscCall(MatCreate(comm, &M));
3888     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3889     PetscCall(MatSetBlockSizes(M, bs, cbs));
3890     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3891     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3892     PetscCall(PetscFree(dlens));
3893   } else {
3894     PetscInt ml, nl;
3895 
3896     M = *newmat;
3897     PetscCall(MatGetLocalSize(M, &ml, &nl));
3898     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3899     PetscCall(MatZeroEntries(M));
3900     /*
3901          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3902        rather than the slower MatSetValues().
3903     */
3904     M->was_assembled = PETSC_TRUE;
3905     M->assembled     = PETSC_FALSE;
3906   }
3907   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3908   aij = (Mat_SeqAIJ *)Mreuse->data;
3909   ii  = aij->i;
3910   jj  = aij->j;
3911 
3912   /* trigger copy to CPU if needed */
3913   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3914   for (i = 0; i < m; i++) {
3915     row   = rstart + i;
3916     nz    = ii[i + 1] - ii[i];
3917     cwork = jj;
3918     jj    = PetscSafePointerPlusOffset(jj, nz);
3919     vwork = aa;
3920     aa    = PetscSafePointerPlusOffset(aa, nz);
3921     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3922   }
3923   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3924 
3925   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3926   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3927   *newmat = M;
3928 
3929   /* save submatrix used in processor for next request */
3930   if (call == MAT_INITIAL_MATRIX) {
3931     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3932     PetscCall(MatDestroy(&Mreuse));
3933   }
3934   PetscFunctionReturn(PETSC_SUCCESS);
3935 }
3936 
3937 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3938 {
3939   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3940   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3941   const PetscInt *JJ;
3942   PetscBool       nooffprocentries;
3943   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3944 
3945   PetscFunctionBegin;
3946   PetscCall(PetscLayoutSetUp(B->rmap));
3947   PetscCall(PetscLayoutSetUp(B->cmap));
3948   m       = B->rmap->n;
3949   cstart  = B->cmap->rstart;
3950   cend    = B->cmap->rend;
3951   rstart  = B->rmap->rstart;
3952   irstart = Ii[0];
3953 
3954   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3955 
3956   if (PetscDefined(USE_DEBUG)) {
3957     for (i = 0; i < m; i++) {
3958       nnz = Ii[i + 1] - Ii[i];
3959       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3960       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3961       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3962       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3963     }
3964   }
3965 
3966   for (i = 0; i < m; i++) {
3967     nnz     = Ii[i + 1] - Ii[i];
3968     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3969     nnz_max = PetscMax(nnz_max, nnz);
3970     d       = 0;
3971     for (j = 0; j < nnz; j++) {
3972       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3973     }
3974     d_nnz[i] = d;
3975     o_nnz[i] = nnz - d;
3976   }
3977   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3978   PetscCall(PetscFree2(d_nnz, o_nnz));
3979 
3980   for (i = 0; i < m; i++) {
3981     ii = i + rstart;
3982     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3983   }
3984   nooffprocentries    = B->nooffprocentries;
3985   B->nooffprocentries = PETSC_TRUE;
3986   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3987   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3988   B->nooffprocentries = nooffprocentries;
3989 
3990   /* count number of entries below block diagonal */
3991   PetscCall(PetscFree(Aij->ld));
3992   PetscCall(PetscCalloc1(m, &ld));
3993   Aij->ld = ld;
3994   for (i = 0; i < m; i++) {
3995     nnz = Ii[i + 1] - Ii[i];
3996     j   = 0;
3997     while (j < nnz && J[j] < cstart) j++;
3998     ld[i] = j;
3999     if (J) J += nnz;
4000   }
4001 
4002   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
4003   PetscFunctionReturn(PETSC_SUCCESS);
4004 }
4005 
4006 /*@
4007   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
4008   (the default parallel PETSc format).
4009 
4010   Collective
4011 
4012   Input Parameters:
4013 + B - the matrix
4014 . i - the indices into `j` for the start of each local row (indices start with zero)
4015 . j - the column indices for each local row (indices start with zero)
4016 - v - optional values in the matrix
4017 
4018   Level: developer
4019 
4020   Notes:
4021   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
4022   thus you CANNOT change the matrix entries by changing the values of `v` after you have
4023   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4024 
4025   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4026 
4027   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
4028 
4029   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
4030 
4031   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
4032   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4033 
4034   The format which is used for the sparse matrix input, is equivalent to a
4035   row-major ordering.. i.e for the following matrix, the input data expected is
4036   as shown
4037 .vb
4038         1 0 0
4039         2 0 3     P0
4040        -------
4041         4 5 6     P1
4042 
4043      Process0 [P0] rows_owned=[0,1]
4044         i =  {0,1,3}  [size = nrow+1  = 2+1]
4045         j =  {0,0,2}  [size = 3]
4046         v =  {1,2,3}  [size = 3]
4047 
4048      Process1 [P1] rows_owned=[2]
4049         i =  {0,3}    [size = nrow+1  = 1+1]
4050         j =  {0,1,2}  [size = 3]
4051         v =  {4,5,6}  [size = 3]
4052 .ve
4053 
4054 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4055           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4056 @*/
4057 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4058 {
4059   PetscFunctionBegin;
4060   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4061   PetscFunctionReturn(PETSC_SUCCESS);
4062 }
4063 
4064 /*@
4065   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4066   (the default parallel PETSc format).  For good matrix assembly performance
4067   the user should preallocate the matrix storage by setting the parameters
4068   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4069 
4070   Collective
4071 
4072   Input Parameters:
4073 + B     - the matrix
4074 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4075            (same value is used for all local rows)
4076 . d_nnz - array containing the number of nonzeros in the various rows of the
4077            DIAGONAL portion of the local submatrix (possibly different for each row)
4078            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4079            The size of this array is equal to the number of local rows, i.e 'm'.
4080            For matrices that will be factored, you must leave room for (and set)
4081            the diagonal entry even if it is zero.
4082 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4083            submatrix (same value is used for all local rows).
4084 - o_nnz - array containing the number of nonzeros in the various rows of the
4085            OFF-DIAGONAL portion of the local submatrix (possibly different for
4086            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4087            structure. The size of this array is equal to the number
4088            of local rows, i.e 'm'.
4089 
4090   Example Usage:
4091   Consider the following 8x8 matrix with 34 non-zero values, that is
4092   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4093   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4094   as follows
4095 
4096 .vb
4097             1  2  0  |  0  3  0  |  0  4
4098     Proc0   0  5  6  |  7  0  0  |  8  0
4099             9  0 10  | 11  0  0  | 12  0
4100     -------------------------------------
4101            13  0 14  | 15 16 17  |  0  0
4102     Proc1   0 18  0  | 19 20 21  |  0  0
4103             0  0  0  | 22 23  0  | 24  0
4104     -------------------------------------
4105     Proc2  25 26 27  |  0  0 28  | 29  0
4106            30  0  0  | 31 32 33  |  0 34
4107 .ve
4108 
4109   This can be represented as a collection of submatrices as
4110 .vb
4111       A B C
4112       D E F
4113       G H I
4114 .ve
4115 
4116   Where the submatrices A,B,C are owned by proc0, D,E,F are
4117   owned by proc1, G,H,I are owned by proc2.
4118 
4119   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4120   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4121   The 'M','N' parameters are 8,8, and have the same values on all procs.
4122 
4123   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4124   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4125   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4126   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4127   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4128   matrix, and [DF] as another `MATSEQAIJ` matrix.
4129 
4130   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4131   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4132   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4133   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4134   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4135   In this case, the values of `d_nz`, `o_nz` are
4136 .vb
4137      proc0  dnz = 2, o_nz = 2
4138      proc1  dnz = 3, o_nz = 2
4139      proc2  dnz = 1, o_nz = 4
4140 .ve
4141   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4142   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4143   for proc3. i.e we are using 12+15+10=37 storage locations to store
4144   34 values.
4145 
4146   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4147   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4148   In the above case the values for `d_nnz`, `o_nnz` are
4149 .vb
4150      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4151      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4152      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4153 .ve
4154   Here the space allocated is sum of all the above values i.e 34, and
4155   hence pre-allocation is perfect.
4156 
4157   Level: intermediate
4158 
4159   Notes:
4160   If the *_nnz parameter is given then the *_nz parameter is ignored
4161 
4162   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4163   storage.  The stored row and column indices begin with zero.
4164   See [Sparse Matrices](sec_matsparse) for details.
4165 
4166   The parallel matrix is partitioned such that the first m0 rows belong to
4167   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4168   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4169 
4170   The DIAGONAL portion of the local submatrix of a processor can be defined
4171   as the submatrix which is obtained by extraction the part corresponding to
4172   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4173   first row that belongs to the processor, r2 is the last row belonging to
4174   the this processor, and c1-c2 is range of indices of the local part of a
4175   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4176   common case of a square matrix, the row and column ranges are the same and
4177   the DIAGONAL part is also square. The remaining portion of the local
4178   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4179 
4180   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4181 
4182   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4183   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4184   You can also run with the option `-info` and look for messages with the string
4185   malloc in them to see if additional memory allocation was needed.
4186 
4187 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4188           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4189 @*/
4190 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4191 {
4192   PetscFunctionBegin;
4193   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4194   PetscValidType(B, 1);
4195   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4196   PetscFunctionReturn(PETSC_SUCCESS);
4197 }
4198 
4199 /*@
4200   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4201   CSR format for the local rows.
4202 
4203   Collective
4204 
4205   Input Parameters:
4206 + comm - MPI communicator
4207 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4208 . n    - This value should be the same as the local size used in creating the
4209          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4210          calculated if `N` is given) For square matrices n is almost always `m`.
4211 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4212 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4213 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4214 . j    - global column indices
4215 - a    - optional matrix values
4216 
4217   Output Parameter:
4218 . mat - the matrix
4219 
4220   Level: intermediate
4221 
4222   Notes:
4223   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4224   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4225   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4226 
4227   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4228 
4229   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4230 
4231   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4232   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4233 
4234   The format which is used for the sparse matrix input, is equivalent to a
4235   row-major ordering, i.e., for the following matrix, the input data expected is
4236   as shown
4237 .vb
4238         1 0 0
4239         2 0 3     P0
4240        -------
4241         4 5 6     P1
4242 
4243      Process0 [P0] rows_owned=[0,1]
4244         i =  {0,1,3}  [size = nrow+1  = 2+1]
4245         j =  {0,0,2}  [size = 3]
4246         v =  {1,2,3}  [size = 3]
4247 
4248      Process1 [P1] rows_owned=[2]
4249         i =  {0,3}    [size = nrow+1  = 1+1]
4250         j =  {0,1,2}  [size = 3]
4251         v =  {4,5,6}  [size = 3]
4252 .ve
4253 
4254 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4255           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4256 @*/
4257 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4258 {
4259   PetscFunctionBegin;
4260   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4261   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4262   PetscCall(MatCreate(comm, mat));
4263   PetscCall(MatSetSizes(*mat, m, n, M, N));
4264   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4265   PetscCall(MatSetType(*mat, MATMPIAIJ));
4266   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4267   PetscFunctionReturn(PETSC_SUCCESS);
4268 }
4269 
4270 /*@
4271   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4272   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4273   from `MatCreateMPIAIJWithArrays()`
4274 
4275   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4276 
4277   Collective
4278 
4279   Input Parameters:
4280 + mat - the matrix
4281 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4282 . n   - This value should be the same as the local size used in creating the
4283        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4284        calculated if N is given) For square matrices n is almost always m.
4285 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4286 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4287 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4288 . J   - column indices
4289 - v   - matrix values
4290 
4291   Level: deprecated
4292 
4293 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4294           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4295 @*/
4296 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4297 {
4298   PetscInt        nnz, i;
4299   PetscBool       nooffprocentries;
4300   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4301   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4302   PetscScalar    *ad, *ao;
4303   PetscInt        ldi, Iii, md;
4304   const PetscInt *Adi = Ad->i;
4305   PetscInt       *ld  = Aij->ld;
4306 
4307   PetscFunctionBegin;
4308   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4309   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4310   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4311   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4312 
4313   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4314   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4315 
4316   for (i = 0; i < m; i++) {
4317     if (PetscDefined(USE_DEBUG)) {
4318       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4319         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4320         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4321       }
4322     }
4323     nnz = Ii[i + 1] - Ii[i];
4324     Iii = Ii[i];
4325     ldi = ld[i];
4326     md  = Adi[i + 1] - Adi[i];
4327     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4328     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4329     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4330     ad += md;
4331     ao += nnz - md;
4332   }
4333   nooffprocentries      = mat->nooffprocentries;
4334   mat->nooffprocentries = PETSC_TRUE;
4335   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4336   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4337   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4338   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4339   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4340   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4341   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4342   mat->nooffprocentries = nooffprocentries;
4343   PetscFunctionReturn(PETSC_SUCCESS);
4344 }
4345 
4346 /*@
4347   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4348 
4349   Collective
4350 
4351   Input Parameters:
4352 + mat - the matrix
4353 - v   - matrix values, stored by row
4354 
4355   Level: intermediate
4356 
4357   Notes:
4358   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4359 
4360   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4361 
4362 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4363           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4364 @*/
4365 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4366 {
4367   PetscInt        nnz, i, m;
4368   PetscBool       nooffprocentries;
4369   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4370   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4371   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4372   PetscScalar    *ad, *ao;
4373   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4374   PetscInt        ldi, Iii, md;
4375   PetscInt       *ld = Aij->ld;
4376 
4377   PetscFunctionBegin;
4378   m = mat->rmap->n;
4379 
4380   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4381   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4382   Iii = 0;
4383   for (i = 0; i < m; i++) {
4384     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4385     ldi = ld[i];
4386     md  = Adi[i + 1] - Adi[i];
4387     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4388     ad += md;
4389     if (ao) {
4390       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4391       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4392       ao += nnz - md;
4393     }
4394     Iii += nnz;
4395   }
4396   nooffprocentries      = mat->nooffprocentries;
4397   mat->nooffprocentries = PETSC_TRUE;
4398   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4399   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4400   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4401   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4402   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4403   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4404   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4405   mat->nooffprocentries = nooffprocentries;
4406   PetscFunctionReturn(PETSC_SUCCESS);
4407 }
4408 
4409 /*@
4410   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4411   (the default parallel PETSc format).  For good matrix assembly performance
4412   the user should preallocate the matrix storage by setting the parameters
4413   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4414 
4415   Collective
4416 
4417   Input Parameters:
4418 + comm  - MPI communicator
4419 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4420           This value should be the same as the local size used in creating the
4421           y vector for the matrix-vector product y = Ax.
4422 . n     - This value should be the same as the local size used in creating the
4423           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4424           calculated if N is given) For square matrices n is almost always m.
4425 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4426 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4427 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4428           (same value is used for all local rows)
4429 . d_nnz - array containing the number of nonzeros in the various rows of the
4430           DIAGONAL portion of the local submatrix (possibly different for each row)
4431           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4432           The size of this array is equal to the number of local rows, i.e 'm'.
4433 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4434           submatrix (same value is used for all local rows).
4435 - o_nnz - array containing the number of nonzeros in the various rows of the
4436           OFF-DIAGONAL portion of the local submatrix (possibly different for
4437           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4438           structure. The size of this array is equal to the number
4439           of local rows, i.e 'm'.
4440 
4441   Output Parameter:
4442 . A - the matrix
4443 
4444   Options Database Keys:
4445 + -mat_no_inode                     - Do not use inodes
4446 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4447 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4448                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4449                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4450 
4451   Level: intermediate
4452 
4453   Notes:
4454   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4455   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4456   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4457 
4458   If the *_nnz parameter is given then the *_nz parameter is ignored
4459 
4460   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4461   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4462   storage requirements for this matrix.
4463 
4464   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4465   processor than it must be used on all processors that share the object for
4466   that argument.
4467 
4468   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4469   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4470 
4471   The user MUST specify either the local or global matrix dimensions
4472   (possibly both).
4473 
4474   The parallel matrix is partitioned across processors such that the
4475   first `m0` rows belong to process 0, the next `m1` rows belong to
4476   process 1, the next `m2` rows belong to process 2, etc., where
4477   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4478   values corresponding to [m x N] submatrix.
4479 
4480   The columns are logically partitioned with the n0 columns belonging
4481   to 0th partition, the next n1 columns belonging to the next
4482   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4483 
4484   The DIAGONAL portion of the local submatrix on any given processor
4485   is the submatrix corresponding to the rows and columns m,n
4486   corresponding to the given processor. i.e diagonal matrix on
4487   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4488   etc. The remaining portion of the local submatrix [m x (N-n)]
4489   constitute the OFF-DIAGONAL portion. The example below better
4490   illustrates this concept. The two matrices, the DIAGONAL portion and
4491   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4492 
4493   For a square global matrix we define each processor's diagonal portion
4494   to be its local rows and the corresponding columns (a square submatrix);
4495   each processor's off-diagonal portion encompasses the remainder of the
4496   local matrix (a rectangular submatrix).
4497 
4498   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4499 
4500   When calling this routine with a single process communicator, a matrix of
4501   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4502   type of communicator, use the construction mechanism
4503 .vb
4504   MatCreate(..., &A);
4505   MatSetType(A, MATMPIAIJ);
4506   MatSetSizes(A, m, n, M, N);
4507   MatMPIAIJSetPreallocation(A, ...);
4508 .ve
4509 
4510   By default, this format uses inodes (identical nodes) when possible.
4511   We search for consecutive rows with the same nonzero structure, thereby
4512   reusing matrix information to achieve increased efficiency.
4513 
4514   Example Usage:
4515   Consider the following 8x8 matrix with 34 non-zero values, that is
4516   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4517   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4518   as follows
4519 
4520 .vb
4521             1  2  0  |  0  3  0  |  0  4
4522     Proc0   0  5  6  |  7  0  0  |  8  0
4523             9  0 10  | 11  0  0  | 12  0
4524     -------------------------------------
4525            13  0 14  | 15 16 17  |  0  0
4526     Proc1   0 18  0  | 19 20 21  |  0  0
4527             0  0  0  | 22 23  0  | 24  0
4528     -------------------------------------
4529     Proc2  25 26 27  |  0  0 28  | 29  0
4530            30  0  0  | 31 32 33  |  0 34
4531 .ve
4532 
4533   This can be represented as a collection of submatrices as
4534 
4535 .vb
4536       A B C
4537       D E F
4538       G H I
4539 .ve
4540 
4541   Where the submatrices A,B,C are owned by proc0, D,E,F are
4542   owned by proc1, G,H,I are owned by proc2.
4543 
4544   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4545   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4546   The 'M','N' parameters are 8,8, and have the same values on all procs.
4547 
4548   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4549   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4550   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4551   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4552   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4553   matrix, and [DF] as another SeqAIJ matrix.
4554 
4555   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4556   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4557   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4558   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4559   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4560   In this case, the values of `d_nz`,`o_nz` are
4561 .vb
4562      proc0  dnz = 2, o_nz = 2
4563      proc1  dnz = 3, o_nz = 2
4564      proc2  dnz = 1, o_nz = 4
4565 .ve
4566   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4567   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4568   for proc3. i.e we are using 12+15+10=37 storage locations to store
4569   34 values.
4570 
4571   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4572   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4573   In the above case the values for d_nnz,o_nnz are
4574 .vb
4575      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4576      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4577      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4578 .ve
4579   Here the space allocated is sum of all the above values i.e 34, and
4580   hence pre-allocation is perfect.
4581 
4582 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4583           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4584           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4585 @*/
4586 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4587 {
4588   PetscMPIInt size;
4589 
4590   PetscFunctionBegin;
4591   PetscCall(MatCreate(comm, A));
4592   PetscCall(MatSetSizes(*A, m, n, M, N));
4593   PetscCallMPI(MPI_Comm_size(comm, &size));
4594   if (size > 1) {
4595     PetscCall(MatSetType(*A, MATMPIAIJ));
4596     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4597   } else {
4598     PetscCall(MatSetType(*A, MATSEQAIJ));
4599     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4600   }
4601   PetscFunctionReturn(PETSC_SUCCESS);
4602 }
4603 
4604 /*@C
4605   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4606 
4607   Not Collective
4608 
4609   Input Parameter:
4610 . A - The `MATMPIAIJ` matrix
4611 
4612   Output Parameters:
4613 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4614 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4615 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4616 
4617   Level: intermediate
4618 
4619   Note:
4620   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4621   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4622   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4623   local column numbers to global column numbers in the original matrix.
4624 
4625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4626 @*/
4627 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4628 {
4629   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4630   PetscBool   flg;
4631 
4632   PetscFunctionBegin;
4633   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4634   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4635   if (Ad) *Ad = a->A;
4636   if (Ao) *Ao = a->B;
4637   if (colmap) *colmap = a->garray;
4638   PetscFunctionReturn(PETSC_SUCCESS);
4639 }
4640 
4641 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4642 {
4643   PetscInt     m, N, i, rstart, nnz, Ii;
4644   PetscInt    *indx;
4645   PetscScalar *values;
4646   MatType      rootType;
4647 
4648   PetscFunctionBegin;
4649   PetscCall(MatGetSize(inmat, &m, &N));
4650   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4651     PetscInt *dnz, *onz, sum, bs, cbs;
4652 
4653     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4654     /* Check sum(n) = N */
4655     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4656     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4657 
4658     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4659     rstart -= m;
4660 
4661     MatPreallocateBegin(comm, m, n, dnz, onz);
4662     for (i = 0; i < m; i++) {
4663       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4664       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4665       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4666     }
4667 
4668     PetscCall(MatCreate(comm, outmat));
4669     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4670     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4671     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4672     PetscCall(MatGetRootType_Private(inmat, &rootType));
4673     PetscCall(MatSetType(*outmat, rootType));
4674     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4675     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4676     MatPreallocateEnd(dnz, onz);
4677     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4678   }
4679 
4680   /* numeric phase */
4681   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4682   for (i = 0; i < m; i++) {
4683     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4684     Ii = i + rstart;
4685     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4686     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4687   }
4688   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4689   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4690   PetscFunctionReturn(PETSC_SUCCESS);
4691 }
4692 
4693 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data)
4694 {
4695   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data;
4696 
4697   PetscFunctionBegin;
4698   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4699   PetscCall(PetscFree(merge->id_r));
4700   PetscCall(PetscFree(merge->len_s));
4701   PetscCall(PetscFree(merge->len_r));
4702   PetscCall(PetscFree(merge->bi));
4703   PetscCall(PetscFree(merge->bj));
4704   PetscCall(PetscFree(merge->buf_ri[0]));
4705   PetscCall(PetscFree(merge->buf_ri));
4706   PetscCall(PetscFree(merge->buf_rj[0]));
4707   PetscCall(PetscFree(merge->buf_rj));
4708   PetscCall(PetscFree(merge->coi));
4709   PetscCall(PetscFree(merge->coj));
4710   PetscCall(PetscFree(merge->owners_co));
4711   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4712   PetscCall(PetscFree(merge));
4713   PetscFunctionReturn(PETSC_SUCCESS);
4714 }
4715 
4716 #include <../src/mat/utils/freespace.h>
4717 #include <petscbt.h>
4718 
4719 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4720 {
4721   MPI_Comm             comm;
4722   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4723   PetscMPIInt          size, rank, taga, *len_s;
4724   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4725   PetscMPIInt          proc, k;
4726   PetscInt           **buf_ri, **buf_rj;
4727   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4728   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4729   MPI_Request         *s_waits, *r_waits;
4730   MPI_Status          *status;
4731   const MatScalar     *aa, *a_a;
4732   MatScalar          **abuf_r, *ba_i;
4733   Mat_Merge_SeqsToMPI *merge;
4734   PetscContainer       container;
4735 
4736   PetscFunctionBegin;
4737   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4738   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4739 
4740   PetscCallMPI(MPI_Comm_size(comm, &size));
4741   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4742 
4743   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4744   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4745   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4746   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4747   aa = a_a;
4748 
4749   bi     = merge->bi;
4750   bj     = merge->bj;
4751   buf_ri = merge->buf_ri;
4752   buf_rj = merge->buf_rj;
4753 
4754   PetscCall(PetscMalloc1(size, &status));
4755   owners = merge->rowmap->range;
4756   len_s  = merge->len_s;
4757 
4758   /* send and recv matrix values */
4759   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4760   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4761 
4762   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4763   for (proc = 0, k = 0; proc < size; proc++) {
4764     if (!len_s[proc]) continue;
4765     i = owners[proc];
4766     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4767     k++;
4768   }
4769 
4770   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4771   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4772   PetscCall(PetscFree(status));
4773 
4774   PetscCall(PetscFree(s_waits));
4775   PetscCall(PetscFree(r_waits));
4776 
4777   /* insert mat values of mpimat */
4778   PetscCall(PetscMalloc1(N, &ba_i));
4779   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4780 
4781   for (k = 0; k < merge->nrecv; k++) {
4782     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4783     nrows       = *buf_ri_k[k];
4784     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4785     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4786   }
4787 
4788   /* set values of ba */
4789   m = merge->rowmap->n;
4790   for (i = 0; i < m; i++) {
4791     arow = owners[rank] + i;
4792     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4793     bnzi = bi[i + 1] - bi[i];
4794     PetscCall(PetscArrayzero(ba_i, bnzi));
4795 
4796     /* add local non-zero vals of this proc's seqmat into ba */
4797     anzi   = ai[arow + 1] - ai[arow];
4798     aj     = a->j + ai[arow];
4799     aa     = a_a + ai[arow];
4800     nextaj = 0;
4801     for (j = 0; nextaj < anzi; j++) {
4802       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4803         ba_i[j] += aa[nextaj++];
4804       }
4805     }
4806 
4807     /* add received vals into ba */
4808     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4809       /* i-th row */
4810       if (i == *nextrow[k]) {
4811         anzi   = *(nextai[k] + 1) - *nextai[k];
4812         aj     = buf_rj[k] + *nextai[k];
4813         aa     = abuf_r[k] + *nextai[k];
4814         nextaj = 0;
4815         for (j = 0; nextaj < anzi; j++) {
4816           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4817             ba_i[j] += aa[nextaj++];
4818           }
4819         }
4820         nextrow[k]++;
4821         nextai[k]++;
4822       }
4823     }
4824     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4825   }
4826   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4827   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4828   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4829 
4830   PetscCall(PetscFree(abuf_r[0]));
4831   PetscCall(PetscFree(abuf_r));
4832   PetscCall(PetscFree(ba_i));
4833   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4834   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4835   PetscFunctionReturn(PETSC_SUCCESS);
4836 }
4837 
4838 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4839 {
4840   Mat                  B_mpi;
4841   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4842   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4843   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4844   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4845   PetscInt             len, *dnz, *onz, bs, cbs;
4846   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4847   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4848   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4849   MPI_Status          *status;
4850   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4851   PetscBT              lnkbt;
4852   Mat_Merge_SeqsToMPI *merge;
4853   PetscContainer       container;
4854 
4855   PetscFunctionBegin;
4856   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4857 
4858   /* make sure it is a PETSc comm */
4859   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4860   PetscCallMPI(MPI_Comm_size(comm, &size));
4861   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4862 
4863   PetscCall(PetscNew(&merge));
4864   PetscCall(PetscMalloc1(size, &status));
4865 
4866   /* determine row ownership */
4867   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4868   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4869   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4870   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4871   PetscCall(PetscLayoutSetUp(merge->rowmap));
4872   PetscCall(PetscMalloc1(size, &len_si));
4873   PetscCall(PetscMalloc1(size, &merge->len_s));
4874 
4875   m      = merge->rowmap->n;
4876   owners = merge->rowmap->range;
4877 
4878   /* determine the number of messages to send, their lengths */
4879   len_s = merge->len_s;
4880 
4881   len          = 0; /* length of buf_si[] */
4882   merge->nsend = 0;
4883   for (PetscMPIInt proc = 0; proc < size; proc++) {
4884     len_si[proc] = 0;
4885     if (proc == rank) {
4886       len_s[proc] = 0;
4887     } else {
4888       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4889       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4890     }
4891     if (len_s[proc]) {
4892       merge->nsend++;
4893       nrows = 0;
4894       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4895         if (ai[i + 1] > ai[i]) nrows++;
4896       }
4897       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4898       len += len_si[proc];
4899     }
4900   }
4901 
4902   /* determine the number and length of messages to receive for ij-structure */
4903   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4904   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4905 
4906   /* post the Irecv of j-structure */
4907   PetscCall(PetscCommGetNewTag(comm, &tagj));
4908   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4909 
4910   /* post the Isend of j-structure */
4911   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4912 
4913   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4914     if (!len_s[proc]) continue;
4915     i = owners[proc];
4916     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4917     k++;
4918   }
4919 
4920   /* receives and sends of j-structure are complete */
4921   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4922   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4923 
4924   /* send and recv i-structure */
4925   PetscCall(PetscCommGetNewTag(comm, &tagi));
4926   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4927 
4928   PetscCall(PetscMalloc1(len + 1, &buf_s));
4929   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4930   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4931     if (!len_s[proc]) continue;
4932     /* form outgoing message for i-structure:
4933          buf_si[0]:                 nrows to be sent
4934                [1:nrows]:           row index (global)
4935                [nrows+1:2*nrows+1]: i-structure index
4936     */
4937     nrows       = len_si[proc] / 2 - 1;
4938     buf_si_i    = buf_si + nrows + 1;
4939     buf_si[0]   = nrows;
4940     buf_si_i[0] = 0;
4941     nrows       = 0;
4942     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4943       anzi = ai[i + 1] - ai[i];
4944       if (anzi) {
4945         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4946         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4947         nrows++;
4948       }
4949     }
4950     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4951     k++;
4952     buf_si += len_si[proc];
4953   }
4954 
4955   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4956   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4957 
4958   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4959   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4960 
4961   PetscCall(PetscFree(len_si));
4962   PetscCall(PetscFree(len_ri));
4963   PetscCall(PetscFree(rj_waits));
4964   PetscCall(PetscFree2(si_waits, sj_waits));
4965   PetscCall(PetscFree(ri_waits));
4966   PetscCall(PetscFree(buf_s));
4967   PetscCall(PetscFree(status));
4968 
4969   /* compute a local seq matrix in each processor */
4970   /* allocate bi array and free space for accumulating nonzero column info */
4971   PetscCall(PetscMalloc1(m + 1, &bi));
4972   bi[0] = 0;
4973 
4974   /* create and initialize a linked list */
4975   nlnk = N + 1;
4976   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4977 
4978   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4979   len = ai[owners[rank + 1]] - ai[owners[rank]];
4980   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4981 
4982   current_space = free_space;
4983 
4984   /* determine symbolic info for each local row */
4985   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4986 
4987   for (k = 0; k < merge->nrecv; k++) {
4988     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4989     nrows       = *buf_ri_k[k];
4990     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4991     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4992   }
4993 
4994   MatPreallocateBegin(comm, m, n, dnz, onz);
4995   len = 0;
4996   for (i = 0; i < m; i++) {
4997     bnzi = 0;
4998     /* add local non-zero cols of this proc's seqmat into lnk */
4999     arow = owners[rank] + i;
5000     anzi = ai[arow + 1] - ai[arow];
5001     aj   = a->j + ai[arow];
5002     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5003     bnzi += nlnk;
5004     /* add received col data into lnk */
5005     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5006       if (i == *nextrow[k]) {            /* i-th row */
5007         anzi = *(nextai[k] + 1) - *nextai[k];
5008         aj   = buf_rj[k] + *nextai[k];
5009         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5010         bnzi += nlnk;
5011         nextrow[k]++;
5012         nextai[k]++;
5013       }
5014     }
5015     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5016 
5017     /* if free space is not available, make more free space */
5018     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5019     /* copy data into free space, then initialize lnk */
5020     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5021     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5022 
5023     current_space->array += bnzi;
5024     current_space->local_used += bnzi;
5025     current_space->local_remaining -= bnzi;
5026 
5027     bi[i + 1] = bi[i] + bnzi;
5028   }
5029 
5030   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5031 
5032   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5033   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5034   PetscCall(PetscLLDestroy(lnk, lnkbt));
5035 
5036   /* create symbolic parallel matrix B_mpi */
5037   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5038   PetscCall(MatCreate(comm, &B_mpi));
5039   if (n == PETSC_DECIDE) {
5040     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5041   } else {
5042     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5043   }
5044   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5045   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5046   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5047   MatPreallocateEnd(dnz, onz);
5048   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5049 
5050   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5051   B_mpi->assembled = PETSC_FALSE;
5052   merge->bi        = bi;
5053   merge->bj        = bj;
5054   merge->buf_ri    = buf_ri;
5055   merge->buf_rj    = buf_rj;
5056   merge->coi       = NULL;
5057   merge->coj       = NULL;
5058   merge->owners_co = NULL;
5059 
5060   PetscCall(PetscCommDestroy(&comm));
5061 
5062   /* attach the supporting struct to B_mpi for reuse */
5063   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5064   PetscCall(PetscContainerSetPointer(container, merge));
5065   PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5066   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5067   PetscCall(PetscContainerDestroy(&container));
5068   *mpimat = B_mpi;
5069 
5070   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5071   PetscFunctionReturn(PETSC_SUCCESS);
5072 }
5073 
5074 /*@
5075   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5076   matrices from each processor
5077 
5078   Collective
5079 
5080   Input Parameters:
5081 + comm   - the communicators the parallel matrix will live on
5082 . seqmat - the input sequential matrices
5083 . m      - number of local rows (or `PETSC_DECIDE`)
5084 . n      - number of local columns (or `PETSC_DECIDE`)
5085 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5086 
5087   Output Parameter:
5088 . mpimat - the parallel matrix generated
5089 
5090   Level: advanced
5091 
5092   Note:
5093   The dimensions of the sequential matrix in each processor MUST be the same.
5094   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5095   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5096 
5097 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5098 @*/
5099 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5100 {
5101   PetscMPIInt size;
5102 
5103   PetscFunctionBegin;
5104   PetscCallMPI(MPI_Comm_size(comm, &size));
5105   if (size == 1) {
5106     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5107     if (scall == MAT_INITIAL_MATRIX) {
5108       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5109     } else {
5110       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5111     }
5112     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5113     PetscFunctionReturn(PETSC_SUCCESS);
5114   }
5115   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5116   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5117   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5118   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5119   PetscFunctionReturn(PETSC_SUCCESS);
5120 }
5121 
5122 /*@
5123   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5124 
5125   Not Collective
5126 
5127   Input Parameter:
5128 . A - the matrix
5129 
5130   Output Parameter:
5131 . A_loc - the local sequential matrix generated
5132 
5133   Level: developer
5134 
5135   Notes:
5136   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5137   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5138   `n` is the global column count obtained with `MatGetSize()`
5139 
5140   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5141 
5142   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5143 
5144   Destroy the matrix with `MatDestroy()`
5145 
5146 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5147 @*/
5148 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5149 {
5150   PetscBool mpi;
5151 
5152   PetscFunctionBegin;
5153   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5154   if (mpi) {
5155     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5156   } else {
5157     *A_loc = A;
5158     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5159   }
5160   PetscFunctionReturn(PETSC_SUCCESS);
5161 }
5162 
5163 /*@
5164   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5165 
5166   Not Collective
5167 
5168   Input Parameters:
5169 + A     - the matrix
5170 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5171 
5172   Output Parameter:
5173 . A_loc - the local sequential matrix generated
5174 
5175   Level: developer
5176 
5177   Notes:
5178   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5179   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5180   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5181 
5182   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5183 
5184   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5185   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5186   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5187   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5188 
5189 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5190 @*/
5191 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5192 {
5193   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5194   Mat_SeqAIJ        *mat, *a, *b;
5195   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5196   const PetscScalar *aa, *ba, *aav, *bav;
5197   PetscScalar       *ca, *cam;
5198   PetscMPIInt        size;
5199   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5200   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5201   PetscBool          match;
5202 
5203   PetscFunctionBegin;
5204   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5205   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5206   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5207   if (size == 1) {
5208     if (scall == MAT_INITIAL_MATRIX) {
5209       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5210       *A_loc = mpimat->A;
5211     } else if (scall == MAT_REUSE_MATRIX) {
5212       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5213     }
5214     PetscFunctionReturn(PETSC_SUCCESS);
5215   }
5216 
5217   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5218   a  = (Mat_SeqAIJ *)mpimat->A->data;
5219   b  = (Mat_SeqAIJ *)mpimat->B->data;
5220   ai = a->i;
5221   aj = a->j;
5222   bi = b->i;
5223   bj = b->j;
5224   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5225   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5226   aa = aav;
5227   ba = bav;
5228   if (scall == MAT_INITIAL_MATRIX) {
5229     PetscCall(PetscMalloc1(1 + am, &ci));
5230     ci[0] = 0;
5231     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5232     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5233     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5234     k = 0;
5235     for (i = 0; i < am; i++) {
5236       ncols_o = bi[i + 1] - bi[i];
5237       ncols_d = ai[i + 1] - ai[i];
5238       /* off-diagonal portion of A */
5239       for (jo = 0; jo < ncols_o; jo++) {
5240         col = cmap[*bj];
5241         if (col >= cstart) break;
5242         cj[k] = col;
5243         bj++;
5244         ca[k++] = *ba++;
5245       }
5246       /* diagonal portion of A */
5247       for (j = 0; j < ncols_d; j++) {
5248         cj[k]   = cstart + *aj++;
5249         ca[k++] = *aa++;
5250       }
5251       /* off-diagonal portion of A */
5252       for (j = jo; j < ncols_o; j++) {
5253         cj[k]   = cmap[*bj++];
5254         ca[k++] = *ba++;
5255       }
5256     }
5257     /* put together the new matrix */
5258     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5259     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5260     /* Since these are PETSc arrays, change flags to free them as necessary. */
5261     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5262     mat->free_a  = PETSC_TRUE;
5263     mat->free_ij = PETSC_TRUE;
5264     mat->nonew   = 0;
5265   } else if (scall == MAT_REUSE_MATRIX) {
5266     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5267     ci  = mat->i;
5268     cj  = mat->j;
5269     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5270     for (i = 0; i < am; i++) {
5271       /* off-diagonal portion of A */
5272       ncols_o = bi[i + 1] - bi[i];
5273       for (jo = 0; jo < ncols_o; jo++) {
5274         col = cmap[*bj];
5275         if (col >= cstart) break;
5276         *cam++ = *ba++;
5277         bj++;
5278       }
5279       /* diagonal portion of A */
5280       ncols_d = ai[i + 1] - ai[i];
5281       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5282       /* off-diagonal portion of A */
5283       for (j = jo; j < ncols_o; j++) {
5284         *cam++ = *ba++;
5285         bj++;
5286       }
5287     }
5288     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5289   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5290   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5291   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5292   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5293   PetscFunctionReturn(PETSC_SUCCESS);
5294 }
5295 
5296 /*@
5297   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5298   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5299 
5300   Not Collective
5301 
5302   Input Parameters:
5303 + A     - the matrix
5304 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5305 
5306   Output Parameters:
5307 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5308 - A_loc - the local sequential matrix generated
5309 
5310   Level: developer
5311 
5312   Note:
5313   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5314   part, then those associated with the off-diagonal part (in its local ordering)
5315 
5316 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5317 @*/
5318 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5319 {
5320   Mat             Ao, Ad;
5321   const PetscInt *cmap;
5322   PetscMPIInt     size;
5323   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5324 
5325   PetscFunctionBegin;
5326   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5327   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5328   if (size == 1) {
5329     if (scall == MAT_INITIAL_MATRIX) {
5330       PetscCall(PetscObjectReference((PetscObject)Ad));
5331       *A_loc = Ad;
5332     } else if (scall == MAT_REUSE_MATRIX) {
5333       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5334     }
5335     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5336     PetscFunctionReturn(PETSC_SUCCESS);
5337   }
5338   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5339   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5340   if (f) {
5341     PetscCall((*f)(A, scall, glob, A_loc));
5342   } else {
5343     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5344     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5345     Mat_SeqAIJ        *c;
5346     PetscInt          *ai = a->i, *aj = a->j;
5347     PetscInt          *bi = b->i, *bj = b->j;
5348     PetscInt          *ci, *cj;
5349     const PetscScalar *aa, *ba;
5350     PetscScalar       *ca;
5351     PetscInt           i, j, am, dn, on;
5352 
5353     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5354     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5355     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5356     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5357     if (scall == MAT_INITIAL_MATRIX) {
5358       PetscInt k;
5359       PetscCall(PetscMalloc1(1 + am, &ci));
5360       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5361       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5362       ci[0] = 0;
5363       for (i = 0, k = 0; i < am; i++) {
5364         const PetscInt ncols_o = bi[i + 1] - bi[i];
5365         const PetscInt ncols_d = ai[i + 1] - ai[i];
5366         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5367         /* diagonal portion of A */
5368         for (j = 0; j < ncols_d; j++, k++) {
5369           cj[k] = *aj++;
5370           ca[k] = *aa++;
5371         }
5372         /* off-diagonal portion of A */
5373         for (j = 0; j < ncols_o; j++, k++) {
5374           cj[k] = dn + *bj++;
5375           ca[k] = *ba++;
5376         }
5377       }
5378       /* put together the new matrix */
5379       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5380       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5381       /* Since these are PETSc arrays, change flags to free them as necessary. */
5382       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5383       c->free_a  = PETSC_TRUE;
5384       c->free_ij = PETSC_TRUE;
5385       c->nonew   = 0;
5386       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5387     } else if (scall == MAT_REUSE_MATRIX) {
5388       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5389       for (i = 0; i < am; i++) {
5390         const PetscInt ncols_d = ai[i + 1] - ai[i];
5391         const PetscInt ncols_o = bi[i + 1] - bi[i];
5392         /* diagonal portion of A */
5393         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5394         /* off-diagonal portion of A */
5395         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5396       }
5397       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5398     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5399     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5400     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5401     if (glob) {
5402       PetscInt cst, *gidx;
5403 
5404       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5405       PetscCall(PetscMalloc1(dn + on, &gidx));
5406       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5407       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5408       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5409     }
5410   }
5411   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5412   PetscFunctionReturn(PETSC_SUCCESS);
5413 }
5414 
5415 /*@C
5416   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5417 
5418   Not Collective
5419 
5420   Input Parameters:
5421 + A     - the matrix
5422 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5423 . row   - index set of rows to extract (or `NULL`)
5424 - col   - index set of columns to extract (or `NULL`)
5425 
5426   Output Parameter:
5427 . A_loc - the local sequential matrix generated
5428 
5429   Level: developer
5430 
5431 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5432 @*/
5433 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5434 {
5435   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5436   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5437   IS          isrowa, iscola;
5438   Mat        *aloc;
5439   PetscBool   match;
5440 
5441   PetscFunctionBegin;
5442   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5443   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5444   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5445   if (!row) {
5446     start = A->rmap->rstart;
5447     end   = A->rmap->rend;
5448     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5449   } else {
5450     isrowa = *row;
5451   }
5452   if (!col) {
5453     start = A->cmap->rstart;
5454     cmap  = a->garray;
5455     nzA   = a->A->cmap->n;
5456     nzB   = a->B->cmap->n;
5457     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5458     ncols = 0;
5459     for (i = 0; i < nzB; i++) {
5460       if (cmap[i] < start) idx[ncols++] = cmap[i];
5461       else break;
5462     }
5463     imark = i;
5464     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5465     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5466     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5467   } else {
5468     iscola = *col;
5469   }
5470   if (scall != MAT_INITIAL_MATRIX) {
5471     PetscCall(PetscMalloc1(1, &aloc));
5472     aloc[0] = *A_loc;
5473   }
5474   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5475   if (!col) { /* attach global id of condensed columns */
5476     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5477   }
5478   *A_loc = aloc[0];
5479   PetscCall(PetscFree(aloc));
5480   if (!row) PetscCall(ISDestroy(&isrowa));
5481   if (!col) PetscCall(ISDestroy(&iscola));
5482   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5483   PetscFunctionReturn(PETSC_SUCCESS);
5484 }
5485 
5486 /*
5487  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5488  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5489  * on a global size.
5490  * */
5491 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5492 {
5493   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5494   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5495   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5496   PetscMPIInt            owner;
5497   PetscSFNode           *iremote, *oiremote;
5498   const PetscInt        *lrowindices;
5499   PetscSF                sf, osf;
5500   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5501   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5502   MPI_Comm               comm;
5503   ISLocalToGlobalMapping mapping;
5504   const PetscScalar     *pd_a, *po_a;
5505 
5506   PetscFunctionBegin;
5507   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5508   /* plocalsize is the number of roots
5509    * nrows is the number of leaves
5510    * */
5511   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5512   PetscCall(ISGetLocalSize(rows, &nrows));
5513   PetscCall(PetscCalloc1(nrows, &iremote));
5514   PetscCall(ISGetIndices(rows, &lrowindices));
5515   for (i = 0; i < nrows; i++) {
5516     /* Find a remote index and an owner for a row
5517      * The row could be local or remote
5518      * */
5519     owner = 0;
5520     lidx  = 0;
5521     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5522     iremote[i].index = lidx;
5523     iremote[i].rank  = owner;
5524   }
5525   /* Create SF to communicate how many nonzero columns for each row */
5526   PetscCall(PetscSFCreate(comm, &sf));
5527   /* SF will figure out the number of nonzero columns for each row, and their
5528    * offsets
5529    * */
5530   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5531   PetscCall(PetscSFSetFromOptions(sf));
5532   PetscCall(PetscSFSetUp(sf));
5533 
5534   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5535   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5536   PetscCall(PetscCalloc1(nrows, &pnnz));
5537   roffsets[0] = 0;
5538   roffsets[1] = 0;
5539   for (i = 0; i < plocalsize; i++) {
5540     /* diagonal */
5541     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5542     /* off-diagonal */
5543     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5544     /* compute offsets so that we relative location for each row */
5545     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5546     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5547   }
5548   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5549   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5550   /* 'r' means root, and 'l' means leaf */
5551   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5552   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5553   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5554   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5555   PetscCall(PetscSFDestroy(&sf));
5556   PetscCall(PetscFree(roffsets));
5557   PetscCall(PetscFree(nrcols));
5558   dntotalcols = 0;
5559   ontotalcols = 0;
5560   ncol        = 0;
5561   for (i = 0; i < nrows; i++) {
5562     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5563     ncol    = PetscMax(pnnz[i], ncol);
5564     /* diagonal */
5565     dntotalcols += nlcols[i * 2 + 0];
5566     /* off-diagonal */
5567     ontotalcols += nlcols[i * 2 + 1];
5568   }
5569   /* We do not need to figure the right number of columns
5570    * since all the calculations will be done by going through the raw data
5571    * */
5572   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5573   PetscCall(MatSetUp(*P_oth));
5574   PetscCall(PetscFree(pnnz));
5575   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5576   /* diagonal */
5577   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5578   /* off-diagonal */
5579   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5580   /* diagonal */
5581   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5582   /* off-diagonal */
5583   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5584   dntotalcols = 0;
5585   ontotalcols = 0;
5586   ntotalcols  = 0;
5587   for (i = 0; i < nrows; i++) {
5588     owner = 0;
5589     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5590     /* Set iremote for diag matrix */
5591     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5592       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5593       iremote[dntotalcols].rank  = owner;
5594       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5595       ilocal[dntotalcols++] = ntotalcols++;
5596     }
5597     /* off-diagonal */
5598     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5599       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5600       oiremote[ontotalcols].rank  = owner;
5601       oilocal[ontotalcols++]      = ntotalcols++;
5602     }
5603   }
5604   PetscCall(ISRestoreIndices(rows, &lrowindices));
5605   PetscCall(PetscFree(loffsets));
5606   PetscCall(PetscFree(nlcols));
5607   PetscCall(PetscSFCreate(comm, &sf));
5608   /* P serves as roots and P_oth is leaves
5609    * Diag matrix
5610    * */
5611   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5612   PetscCall(PetscSFSetFromOptions(sf));
5613   PetscCall(PetscSFSetUp(sf));
5614 
5615   PetscCall(PetscSFCreate(comm, &osf));
5616   /* off-diagonal */
5617   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5618   PetscCall(PetscSFSetFromOptions(osf));
5619   PetscCall(PetscSFSetUp(osf));
5620   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5621   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5622   /* operate on the matrix internal data to save memory */
5623   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5624   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5625   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5626   /* Convert to global indices for diag matrix */
5627   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5628   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5629   /* We want P_oth store global indices */
5630   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5631   /* Use memory scalable approach */
5632   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5633   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5634   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5635   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5636   /* Convert back to local indices */
5637   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5638   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5639   nout = 0;
5640   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5641   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5642   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5643   /* Exchange values */
5644   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5645   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5646   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5647   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5648   /* Stop PETSc from shrinking memory */
5649   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5650   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5651   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5652   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5653   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5654   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5655   PetscCall(PetscSFDestroy(&sf));
5656   PetscCall(PetscSFDestroy(&osf));
5657   PetscFunctionReturn(PETSC_SUCCESS);
5658 }
5659 
5660 /*
5661  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5662  * This supports MPIAIJ and MAIJ
5663  * */
5664 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5665 {
5666   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5667   Mat_SeqAIJ *p_oth;
5668   IS          rows, map;
5669   PetscHMapI  hamp;
5670   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5671   MPI_Comm    comm;
5672   PetscSF     sf, osf;
5673   PetscBool   has;
5674 
5675   PetscFunctionBegin;
5676   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5677   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5678   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5679    *  and then create a submatrix (that often is an overlapping matrix)
5680    * */
5681   if (reuse == MAT_INITIAL_MATRIX) {
5682     /* Use a hash table to figure out unique keys */
5683     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5684     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5685     count = 0;
5686     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5687     for (i = 0; i < a->B->cmap->n; i++) {
5688       key = a->garray[i] / dof;
5689       PetscCall(PetscHMapIHas(hamp, key, &has));
5690       if (!has) {
5691         mapping[i] = count;
5692         PetscCall(PetscHMapISet(hamp, key, count++));
5693       } else {
5694         /* Current 'i' has the same value the previous step */
5695         mapping[i] = count - 1;
5696       }
5697     }
5698     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5699     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5700     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5701     PetscCall(PetscCalloc1(htsize, &rowindices));
5702     off = 0;
5703     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5704     PetscCall(PetscHMapIDestroy(&hamp));
5705     PetscCall(PetscSortInt(htsize, rowindices));
5706     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5707     /* In case, the matrix was already created but users want to recreate the matrix */
5708     PetscCall(MatDestroy(P_oth));
5709     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5710     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5711     PetscCall(ISDestroy(&map));
5712     PetscCall(ISDestroy(&rows));
5713   } else if (reuse == MAT_REUSE_MATRIX) {
5714     /* If matrix was already created, we simply update values using SF objects
5715      * that as attached to the matrix earlier.
5716      */
5717     const PetscScalar *pd_a, *po_a;
5718 
5719     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5720     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5721     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5722     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5723     /* Update values in place */
5724     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5725     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5726     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5727     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5728     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5729     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5730     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5731     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5732   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5733   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5734   PetscFunctionReturn(PETSC_SUCCESS);
5735 }
5736 
5737 /*@C
5738   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5739 
5740   Collective
5741 
5742   Input Parameters:
5743 + A     - the first matrix in `MATMPIAIJ` format
5744 . B     - the second matrix in `MATMPIAIJ` format
5745 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5746 
5747   Output Parameters:
5748 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5749 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5750 - B_seq - the sequential matrix generated
5751 
5752   Level: developer
5753 
5754 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5755 @*/
5756 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5757 {
5758   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5759   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5760   IS          isrowb, iscolb;
5761   Mat        *bseq = NULL;
5762 
5763   PetscFunctionBegin;
5764   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5765              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5766   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5767 
5768   if (scall == MAT_INITIAL_MATRIX) {
5769     start = A->cmap->rstart;
5770     cmap  = a->garray;
5771     nzA   = a->A->cmap->n;
5772     nzB   = a->B->cmap->n;
5773     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5774     ncols = 0;
5775     for (i = 0; i < nzB; i++) { /* row < local row index */
5776       if (cmap[i] < start) idx[ncols++] = cmap[i];
5777       else break;
5778     }
5779     imark = i;
5780     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5781     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5782     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5783     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5784   } else {
5785     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5786     isrowb = *rowb;
5787     iscolb = *colb;
5788     PetscCall(PetscMalloc1(1, &bseq));
5789     bseq[0] = *B_seq;
5790   }
5791   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5792   *B_seq = bseq[0];
5793   PetscCall(PetscFree(bseq));
5794   if (!rowb) {
5795     PetscCall(ISDestroy(&isrowb));
5796   } else {
5797     *rowb = isrowb;
5798   }
5799   if (!colb) {
5800     PetscCall(ISDestroy(&iscolb));
5801   } else {
5802     *colb = iscolb;
5803   }
5804   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5805   PetscFunctionReturn(PETSC_SUCCESS);
5806 }
5807 
5808 /*
5809     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5810     of the OFF-DIAGONAL portion of local A
5811 
5812     Collective
5813 
5814    Input Parameters:
5815 +    A,B - the matrices in `MATMPIAIJ` format
5816 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5817 
5818    Output Parameter:
5819 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5820 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5821 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5822 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5823 
5824     Developer Note:
5825     This directly accesses information inside the VecScatter associated with the matrix-vector product
5826      for this matrix. This is not desirable..
5827 
5828     Level: developer
5829 
5830 */
5831 
5832 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5833 {
5834   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5835   VecScatter         ctx;
5836   MPI_Comm           comm;
5837   const PetscMPIInt *rprocs, *sprocs;
5838   PetscMPIInt        nrecvs, nsends;
5839   const PetscInt    *srow, *rstarts, *sstarts;
5840   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5841   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5842   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5843   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5844   PetscMPIInt        size, tag, rank, nreqs;
5845 
5846   PetscFunctionBegin;
5847   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5848   PetscCallMPI(MPI_Comm_size(comm, &size));
5849 
5850   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5851              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5852   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5853   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5854 
5855   if (size == 1) {
5856     startsj_s = NULL;
5857     bufa_ptr  = NULL;
5858     *B_oth    = NULL;
5859     PetscFunctionReturn(PETSC_SUCCESS);
5860   }
5861 
5862   ctx = a->Mvctx;
5863   tag = ((PetscObject)ctx)->tag;
5864 
5865   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5866   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5867   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5868   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5869   PetscCall(PetscMalloc1(nreqs, &reqs));
5870   rwaits = reqs;
5871   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5872 
5873   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5874   if (scall == MAT_INITIAL_MATRIX) {
5875     /* i-array */
5876     /*  post receives */
5877     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5878     for (i = 0; i < nrecvs; i++) {
5879       rowlen = rvalues + rstarts[i] * rbs;
5880       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5881       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5882     }
5883 
5884     /* pack the outgoing message */
5885     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5886 
5887     sstartsj[0] = 0;
5888     rstartsj[0] = 0;
5889     len         = 0; /* total length of j or a array to be sent */
5890     if (nsends) {
5891       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5892       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5893     }
5894     for (i = 0; i < nsends; i++) {
5895       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5896       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5897       for (j = 0; j < nrows; j++) {
5898         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5899         for (l = 0; l < sbs; l++) {
5900           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5901 
5902           rowlen[j * sbs + l] = ncols;
5903 
5904           len += ncols;
5905           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5906         }
5907         k++;
5908       }
5909       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5910 
5911       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5912     }
5913     /* recvs and sends of i-array are completed */
5914     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5915     PetscCall(PetscFree(svalues));
5916 
5917     /* allocate buffers for sending j and a arrays */
5918     PetscCall(PetscMalloc1(len + 1, &bufj));
5919     PetscCall(PetscMalloc1(len + 1, &bufa));
5920 
5921     /* create i-array of B_oth */
5922     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5923 
5924     b_othi[0] = 0;
5925     len       = 0; /* total length of j or a array to be received */
5926     k         = 0;
5927     for (i = 0; i < nrecvs; i++) {
5928       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5929       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5930       for (j = 0; j < nrows; j++) {
5931         b_othi[k + 1] = b_othi[k] + rowlen[j];
5932         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5933         k++;
5934       }
5935       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5936     }
5937     PetscCall(PetscFree(rvalues));
5938 
5939     /* allocate space for j and a arrays of B_oth */
5940     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5941     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5942 
5943     /* j-array */
5944     /*  post receives of j-array */
5945     for (i = 0; i < nrecvs; i++) {
5946       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5947       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5948     }
5949 
5950     /* pack the outgoing message j-array */
5951     if (nsends) k = sstarts[0];
5952     for (i = 0; i < nsends; i++) {
5953       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5954       bufJ  = bufj + sstartsj[i];
5955       for (j = 0; j < nrows; j++) {
5956         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5957         for (ll = 0; ll < sbs; ll++) {
5958           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5959           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5960           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5961         }
5962       }
5963       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5964     }
5965 
5966     /* recvs and sends of j-array are completed */
5967     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5968   } else if (scall == MAT_REUSE_MATRIX) {
5969     sstartsj = *startsj_s;
5970     rstartsj = *startsj_r;
5971     bufa     = *bufa_ptr;
5972     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5973   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5974 
5975   /* a-array */
5976   /*  post receives of a-array */
5977   for (i = 0; i < nrecvs; i++) {
5978     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5979     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5980   }
5981 
5982   /* pack the outgoing message a-array */
5983   if (nsends) k = sstarts[0];
5984   for (i = 0; i < nsends; i++) {
5985     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5986     bufA  = bufa + sstartsj[i];
5987     for (j = 0; j < nrows; j++) {
5988       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5989       for (ll = 0; ll < sbs; ll++) {
5990         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5991         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5992         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5993       }
5994     }
5995     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5996   }
5997   /* recvs and sends of a-array are completed */
5998   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5999   PetscCall(PetscFree(reqs));
6000 
6001   if (scall == MAT_INITIAL_MATRIX) {
6002     Mat_SeqAIJ *b_oth;
6003 
6004     /* put together the new matrix */
6005     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6006 
6007     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6008     /* Since these are PETSc arrays, change flags to free them as necessary. */
6009     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6010     b_oth->free_a  = PETSC_TRUE;
6011     b_oth->free_ij = PETSC_TRUE;
6012     b_oth->nonew   = 0;
6013 
6014     PetscCall(PetscFree(bufj));
6015     if (!startsj_s || !bufa_ptr) {
6016       PetscCall(PetscFree2(sstartsj, rstartsj));
6017       PetscCall(PetscFree(bufa_ptr));
6018     } else {
6019       *startsj_s = sstartsj;
6020       *startsj_r = rstartsj;
6021       *bufa_ptr  = bufa;
6022     }
6023   } else if (scall == MAT_REUSE_MATRIX) {
6024     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6025   }
6026 
6027   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6028   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6029   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6030   PetscFunctionReturn(PETSC_SUCCESS);
6031 }
6032 
6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6036 #if defined(PETSC_HAVE_MKL_SPARSE)
6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6038 #endif
6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6040 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6041 #if defined(PETSC_HAVE_ELEMENTAL)
6042 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6043 #endif
6044 #if defined(PETSC_HAVE_SCALAPACK)
6045 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6046 #endif
6047 #if defined(PETSC_HAVE_HYPRE)
6048 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6049 #endif
6050 #if defined(PETSC_HAVE_CUDA)
6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6052 #endif
6053 #if defined(PETSC_HAVE_HIP)
6054 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6055 #endif
6056 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6058 #endif
6059 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6060 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6061 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6062 
6063 /*
6064     Computes (B'*A')' since computing B*A directly is untenable
6065 
6066                n                       p                          p
6067         [             ]       [             ]         [                 ]
6068       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6069         [             ]       [             ]         [                 ]
6070 
6071 */
6072 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6073 {
6074   Mat At, Bt, Ct;
6075 
6076   PetscFunctionBegin;
6077   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6078   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6079   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6080   PetscCall(MatDestroy(&At));
6081   PetscCall(MatDestroy(&Bt));
6082   PetscCall(MatTransposeSetPrecursor(Ct, C));
6083   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6084   PetscCall(MatDestroy(&Ct));
6085   PetscFunctionReturn(PETSC_SUCCESS);
6086 }
6087 
6088 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6089 {
6090   PetscBool cisdense;
6091 
6092   PetscFunctionBegin;
6093   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6094   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6095   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6096   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6097   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6098   PetscCall(MatSetUp(C));
6099 
6100   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6101   PetscFunctionReturn(PETSC_SUCCESS);
6102 }
6103 
6104 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6105 {
6106   Mat_Product *product = C->product;
6107   Mat          A = product->A, B = product->B;
6108 
6109   PetscFunctionBegin;
6110   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6111              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6112   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6113   C->ops->productsymbolic = MatProductSymbolic_AB;
6114   PetscFunctionReturn(PETSC_SUCCESS);
6115 }
6116 
6117 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6118 {
6119   Mat_Product *product = C->product;
6120 
6121   PetscFunctionBegin;
6122   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6123   PetscFunctionReturn(PETSC_SUCCESS);
6124 }
6125 
6126 /*
6127    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6128 
6129   Input Parameters:
6130 
6131     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6132     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6133 
6134     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6135 
6136     For Set1, j1[] contains column indices of the nonzeros.
6137     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6138     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6139     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6140 
6141     Similar for Set2.
6142 
6143     This routine merges the two sets of nonzeros row by row and removes repeats.
6144 
6145   Output Parameters: (memory is allocated by the caller)
6146 
6147     i[],j[]: the CSR of the merged matrix, which has m rows.
6148     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6149     imap2[]: similar to imap1[], but for Set2.
6150     Note we order nonzeros row-by-row and from left to right.
6151 */
6152 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6153 {
6154   PetscInt   r, m; /* Row index of mat */
6155   PetscCount t, t1, t2, b1, e1, b2, e2;
6156 
6157   PetscFunctionBegin;
6158   PetscCall(MatGetLocalSize(mat, &m, NULL));
6159   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6160   i[0]        = 0;
6161   for (r = 0; r < m; r++) { /* Do row by row merging */
6162     b1 = rowBegin1[r];
6163     e1 = rowEnd1[r];
6164     b2 = rowBegin2[r];
6165     e2 = rowEnd2[r];
6166     while (b1 < e1 && b2 < e2) {
6167       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6168         j[t]      = j1[b1];
6169         imap1[t1] = t;
6170         imap2[t2] = t;
6171         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6172         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6173         t1++;
6174         t2++;
6175         t++;
6176       } else if (j1[b1] < j2[b2]) {
6177         j[t]      = j1[b1];
6178         imap1[t1] = t;
6179         b1 += jmap1[t1 + 1] - jmap1[t1];
6180         t1++;
6181         t++;
6182       } else {
6183         j[t]      = j2[b2];
6184         imap2[t2] = t;
6185         b2 += jmap2[t2 + 1] - jmap2[t2];
6186         t2++;
6187         t++;
6188       }
6189     }
6190     /* Merge the remaining in either j1[] or j2[] */
6191     while (b1 < e1) {
6192       j[t]      = j1[b1];
6193       imap1[t1] = t;
6194       b1 += jmap1[t1 + 1] - jmap1[t1];
6195       t1++;
6196       t++;
6197     }
6198     while (b2 < e2) {
6199       j[t]      = j2[b2];
6200       imap2[t2] = t;
6201       b2 += jmap2[t2 + 1] - jmap2[t2];
6202       t2++;
6203       t++;
6204     }
6205     PetscCall(PetscIntCast(t, i + r + 1));
6206   }
6207   PetscFunctionReturn(PETSC_SUCCESS);
6208 }
6209 
6210 /*
6211   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6212 
6213   Input Parameters:
6214     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6215     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6216       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6217 
6218       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6219       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6220 
6221   Output Parameters:
6222     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6223     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6224       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6225       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6226 
6227     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6228       Atot: number of entries belonging to the diagonal block.
6229       Annz: number of unique nonzeros belonging to the diagonal block.
6230       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6231         repeats (i.e., same 'i,j' pair).
6232       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6233         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6234 
6235       Atot: number of entries belonging to the diagonal block
6236       Annz: number of unique nonzeros belonging to the diagonal block.
6237 
6238     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6239 
6240     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6241 */
6242 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6243 {
6244   PetscInt    cstart, cend, rstart, rend, row, col;
6245   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6246   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6247   PetscCount  k, m, p, q, r, s, mid;
6248   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6249 
6250   PetscFunctionBegin;
6251   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6252   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6253   m = rend - rstart;
6254 
6255   /* Skip negative rows */
6256   for (k = 0; k < n; k++)
6257     if (i[k] >= 0) break;
6258 
6259   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6260      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6261   */
6262   while (k < n) {
6263     row = i[k];
6264     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6265     for (s = k; s < n; s++)
6266       if (i[s] != row) break;
6267 
6268     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6269     for (p = k; p < s; p++) {
6270       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6271       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6272     }
6273     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6274     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6275     rowBegin[row - rstart] = k;
6276     rowMid[row - rstart]   = mid;
6277     rowEnd[row - rstart]   = s;
6278 
6279     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6280     Atot += mid - k;
6281     Btot += s - mid;
6282 
6283     /* Count unique nonzeros of this diag row */
6284     for (p = k; p < mid;) {
6285       col = j[p];
6286       do {
6287         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6288         p++;
6289       } while (p < mid && j[p] == col);
6290       Annz++;
6291     }
6292 
6293     /* Count unique nonzeros of this offdiag row */
6294     for (p = mid; p < s;) {
6295       col = j[p];
6296       do {
6297         p++;
6298       } while (p < s && j[p] == col);
6299       Bnnz++;
6300     }
6301     k = s;
6302   }
6303 
6304   /* Allocation according to Atot, Btot, Annz, Bnnz */
6305   PetscCall(PetscMalloc1(Atot, &Aperm));
6306   PetscCall(PetscMalloc1(Btot, &Bperm));
6307   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6308   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6309 
6310   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6311   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6312   for (r = 0; r < m; r++) {
6313     k   = rowBegin[r];
6314     mid = rowMid[r];
6315     s   = rowEnd[r];
6316     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6317     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6318     Atot += mid - k;
6319     Btot += s - mid;
6320 
6321     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6322     for (p = k; p < mid;) {
6323       col = j[p];
6324       q   = p;
6325       do {
6326         p++;
6327       } while (p < mid && j[p] == col);
6328       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6329       Annz++;
6330     }
6331 
6332     for (p = mid; p < s;) {
6333       col = j[p];
6334       q   = p;
6335       do {
6336         p++;
6337       } while (p < s && j[p] == col);
6338       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6339       Bnnz++;
6340     }
6341   }
6342   /* Output */
6343   *Aperm_ = Aperm;
6344   *Annz_  = Annz;
6345   *Atot_  = Atot;
6346   *Ajmap_ = Ajmap;
6347   *Bperm_ = Bperm;
6348   *Bnnz_  = Bnnz;
6349   *Btot_  = Btot;
6350   *Bjmap_ = Bjmap;
6351   PetscFunctionReturn(PETSC_SUCCESS);
6352 }
6353 
6354 /*
6355   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6356 
6357   Input Parameters:
6358     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6359     nnz:  number of unique nonzeros in the merged matrix
6360     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6361     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6362 
6363   Output Parameter: (memory is allocated by the caller)
6364     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6365 
6366   Example:
6367     nnz1 = 4
6368     nnz  = 6
6369     imap = [1,3,4,5]
6370     jmap = [0,3,5,6,7]
6371    then,
6372     jmap_new = [0,0,3,3,5,6,7]
6373 */
6374 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6375 {
6376   PetscCount k, p;
6377 
6378   PetscFunctionBegin;
6379   jmap_new[0] = 0;
6380   p           = nnz;                /* p loops over jmap_new[] backwards */
6381   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6382     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6383   }
6384   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6385   PetscFunctionReturn(PETSC_SUCCESS);
6386 }
6387 
6388 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data)
6389 {
6390   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data;
6391 
6392   PetscFunctionBegin;
6393   PetscCall(PetscSFDestroy(&coo->sf));
6394   PetscCall(PetscFree(coo->Aperm1));
6395   PetscCall(PetscFree(coo->Bperm1));
6396   PetscCall(PetscFree(coo->Ajmap1));
6397   PetscCall(PetscFree(coo->Bjmap1));
6398   PetscCall(PetscFree(coo->Aimap2));
6399   PetscCall(PetscFree(coo->Bimap2));
6400   PetscCall(PetscFree(coo->Aperm2));
6401   PetscCall(PetscFree(coo->Bperm2));
6402   PetscCall(PetscFree(coo->Ajmap2));
6403   PetscCall(PetscFree(coo->Bjmap2));
6404   PetscCall(PetscFree(coo->Cperm1));
6405   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6406   PetscCall(PetscFree(coo));
6407   PetscFunctionReturn(PETSC_SUCCESS);
6408 }
6409 
6410 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6411 {
6412   MPI_Comm             comm;
6413   PetscMPIInt          rank, size;
6414   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6415   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6416   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6417   PetscContainer       container;
6418   MatCOOStruct_MPIAIJ *coo;
6419 
6420   PetscFunctionBegin;
6421   PetscCall(PetscFree(mpiaij->garray));
6422   PetscCall(VecDestroy(&mpiaij->lvec));
6423 #if defined(PETSC_USE_CTABLE)
6424   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6425 #else
6426   PetscCall(PetscFree(mpiaij->colmap));
6427 #endif
6428   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6429   mat->assembled     = PETSC_FALSE;
6430   mat->was_assembled = PETSC_FALSE;
6431 
6432   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6433   PetscCallMPI(MPI_Comm_size(comm, &size));
6434   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6435   PetscCall(PetscLayoutSetUp(mat->rmap));
6436   PetscCall(PetscLayoutSetUp(mat->cmap));
6437   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6438   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6439   PetscCall(MatGetLocalSize(mat, &m, &n));
6440   PetscCall(MatGetSize(mat, &M, &N));
6441 
6442   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6443   /* entries come first, then local rows, then remote rows.                     */
6444   PetscCount n1 = coo_n, *perm1;
6445   PetscInt  *i1 = coo_i, *j1 = coo_j;
6446 
6447   PetscCall(PetscMalloc1(n1, &perm1));
6448   for (k = 0; k < n1; k++) perm1[k] = k;
6449 
6450   /* Manipulate indices so that entries with negative row or col indices will have smallest
6451      row indices, local entries will have greater but negative row indices, and remote entries
6452      will have positive row indices.
6453   */
6454   for (k = 0; k < n1; k++) {
6455     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6456     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6457     else {
6458       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6459       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6460     }
6461   }
6462 
6463   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6464   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6465 
6466   /* Advance k to the first entry we need to take care of */
6467   for (k = 0; k < n1; k++)
6468     if (i1[k] > PETSC_INT_MIN) break;
6469   PetscCount i1start = k;
6470 
6471   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6472   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6473 
6474   /*           Send remote rows to their owner                                  */
6475   /* Find which rows should be sent to which remote ranks*/
6476   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6477   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6478   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6479   const PetscInt *ranges;
6480   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6481 
6482   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6483   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6484   for (k = rem; k < n1;) {
6485     PetscMPIInt owner;
6486     PetscInt    firstRow, lastRow;
6487 
6488     /* Locate a row range */
6489     firstRow = i1[k]; /* first row of this owner */
6490     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6491     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6492 
6493     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6494     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6495 
6496     /* All entries in [k,p) belong to this remote owner */
6497     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6498       PetscMPIInt *sendto2;
6499       PetscInt    *nentries2;
6500       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6501 
6502       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6503       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6504       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6505       PetscCall(PetscFree2(sendto, nentries2));
6506       sendto   = sendto2;
6507       nentries = nentries2;
6508       maxNsend = maxNsend2;
6509     }
6510     sendto[nsend] = owner;
6511     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6512     nsend++;
6513     k = p;
6514   }
6515 
6516   /* Build 1st SF to know offsets on remote to send data */
6517   PetscSF      sf1;
6518   PetscInt     nroots = 1, nroots2 = 0;
6519   PetscInt     nleaves = nsend, nleaves2 = 0;
6520   PetscInt    *offsets;
6521   PetscSFNode *iremote;
6522 
6523   PetscCall(PetscSFCreate(comm, &sf1));
6524   PetscCall(PetscMalloc1(nsend, &iremote));
6525   PetscCall(PetscMalloc1(nsend, &offsets));
6526   for (k = 0; k < nsend; k++) {
6527     iremote[k].rank  = sendto[k];
6528     iremote[k].index = 0;
6529     nleaves2 += nentries[k];
6530     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6531   }
6532   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6533   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6534   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6535   PetscCall(PetscSFDestroy(&sf1));
6536   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6537 
6538   /* Build 2nd SF to send remote COOs to their owner */
6539   PetscSF sf2;
6540   nroots  = nroots2;
6541   nleaves = nleaves2;
6542   PetscCall(PetscSFCreate(comm, &sf2));
6543   PetscCall(PetscSFSetFromOptions(sf2));
6544   PetscCall(PetscMalloc1(nleaves, &iremote));
6545   p = 0;
6546   for (k = 0; k < nsend; k++) {
6547     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6548     for (q = 0; q < nentries[k]; q++, p++) {
6549       iremote[p].rank = sendto[k];
6550       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6551     }
6552   }
6553   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6554 
6555   /* Send the remote COOs to their owner */
6556   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6557   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6558   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6559   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6560   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6561   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6562   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6563   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6564   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6565   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6566   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6567 
6568   PetscCall(PetscFree(offsets));
6569   PetscCall(PetscFree2(sendto, nentries));
6570 
6571   /* Sort received COOs by row along with the permutation array     */
6572   for (k = 0; k < n2; k++) perm2[k] = k;
6573   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6574 
6575   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6576   PetscCount *Cperm1;
6577   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6578   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6579   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6580   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6581 
6582   /* Support for HYPRE matrices, kind of a hack.
6583      Swap min column with diagonal so that diagonal values will go first */
6584   PetscBool hypre;
6585   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6586   if (hypre) {
6587     PetscInt *minj;
6588     PetscBT   hasdiag;
6589 
6590     PetscCall(PetscBTCreate(m, &hasdiag));
6591     PetscCall(PetscMalloc1(m, &minj));
6592     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6593     for (k = i1start; k < rem; k++) {
6594       if (j1[k] < cstart || j1[k] >= cend) continue;
6595       const PetscInt rindex = i1[k] - rstart;
6596       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6597       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6598     }
6599     for (k = 0; k < n2; k++) {
6600       if (j2[k] < cstart || j2[k] >= cend) continue;
6601       const PetscInt rindex = i2[k] - rstart;
6602       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6603       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6604     }
6605     for (k = i1start; k < rem; k++) {
6606       const PetscInt rindex = i1[k] - rstart;
6607       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6608       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6609       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6610     }
6611     for (k = 0; k < n2; k++) {
6612       const PetscInt rindex = i2[k] - rstart;
6613       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6614       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6615       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6616     }
6617     PetscCall(PetscBTDestroy(&hasdiag));
6618     PetscCall(PetscFree(minj));
6619   }
6620 
6621   /* Split local COOs and received COOs into diag/offdiag portions */
6622   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6623   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6624   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6625   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6626   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6627   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6628 
6629   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6630   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6631   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6632   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6633 
6634   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6635   PetscInt *Ai, *Bi;
6636   PetscInt *Aj, *Bj;
6637 
6638   PetscCall(PetscMalloc1(m + 1, &Ai));
6639   PetscCall(PetscMalloc1(m + 1, &Bi));
6640   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6641   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6642 
6643   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6644   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6645   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6646   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6647   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6648 
6649   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6650   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6651 
6652   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6653   /* expect nonzeros in A/B most likely have local contributing entries        */
6654   PetscInt    Annz = Ai[m];
6655   PetscInt    Bnnz = Bi[m];
6656   PetscCount *Ajmap1_new, *Bjmap1_new;
6657 
6658   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6659   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6660 
6661   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6662   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6663 
6664   PetscCall(PetscFree(Aimap1));
6665   PetscCall(PetscFree(Ajmap1));
6666   PetscCall(PetscFree(Bimap1));
6667   PetscCall(PetscFree(Bjmap1));
6668   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6669   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6670   PetscCall(PetscFree(perm1));
6671   PetscCall(PetscFree3(i2, j2, perm2));
6672 
6673   Ajmap1 = Ajmap1_new;
6674   Bjmap1 = Bjmap1_new;
6675 
6676   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6677   if (Annz < Annz1 + Annz2) {
6678     PetscInt *Aj_new;
6679     PetscCall(PetscMalloc1(Annz, &Aj_new));
6680     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6681     PetscCall(PetscFree(Aj));
6682     Aj = Aj_new;
6683   }
6684 
6685   if (Bnnz < Bnnz1 + Bnnz2) {
6686     PetscInt *Bj_new;
6687     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6688     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6689     PetscCall(PetscFree(Bj));
6690     Bj = Bj_new;
6691   }
6692 
6693   /* Create new submatrices for on-process and off-process coupling                  */
6694   PetscScalar     *Aa, *Ba;
6695   MatType          rtype;
6696   Mat_SeqAIJ      *a, *b;
6697   PetscObjectState state;
6698   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6699   PetscCall(PetscCalloc1(Bnnz, &Ba));
6700   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6701   if (cstart) {
6702     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6703   }
6704 
6705   PetscCall(MatGetRootType_Private(mat, &rtype));
6706 
6707   MatSeqXAIJGetOptions_Private(mpiaij->A);
6708   PetscCall(MatDestroy(&mpiaij->A));
6709   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6710   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6711   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6712 
6713   MatSeqXAIJGetOptions_Private(mpiaij->B);
6714   PetscCall(MatDestroy(&mpiaij->B));
6715   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6716   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6717   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6718 
6719   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6720   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6721   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6722   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6723 
6724   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6725   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6726   a->free_a  = PETSC_TRUE;
6727   a->free_ij = PETSC_TRUE;
6728   b->free_a  = PETSC_TRUE;
6729   b->free_ij = PETSC_TRUE;
6730   a->maxnz   = a->nz;
6731   b->maxnz   = b->nz;
6732 
6733   /* conversion must happen AFTER multiply setup */
6734   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6735   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6736   PetscCall(VecDestroy(&mpiaij->lvec));
6737   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6738 
6739   // Put the COO struct in a container and then attach that to the matrix
6740   PetscCall(PetscMalloc1(1, &coo));
6741   coo->n       = coo_n;
6742   coo->sf      = sf2;
6743   coo->sendlen = nleaves;
6744   coo->recvlen = nroots;
6745   coo->Annz    = Annz;
6746   coo->Bnnz    = Bnnz;
6747   coo->Annz2   = Annz2;
6748   coo->Bnnz2   = Bnnz2;
6749   coo->Atot1   = Atot1;
6750   coo->Atot2   = Atot2;
6751   coo->Btot1   = Btot1;
6752   coo->Btot2   = Btot2;
6753   coo->Ajmap1  = Ajmap1;
6754   coo->Aperm1  = Aperm1;
6755   coo->Bjmap1  = Bjmap1;
6756   coo->Bperm1  = Bperm1;
6757   coo->Aimap2  = Aimap2;
6758   coo->Ajmap2  = Ajmap2;
6759   coo->Aperm2  = Aperm2;
6760   coo->Bimap2  = Bimap2;
6761   coo->Bjmap2  = Bjmap2;
6762   coo->Bperm2  = Bperm2;
6763   coo->Cperm1  = Cperm1;
6764   // Allocate in preallocation. If not used, it has zero cost on host
6765   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6766   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6767   PetscCall(PetscContainerSetPointer(container, coo));
6768   PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ));
6769   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6770   PetscCall(PetscContainerDestroy(&container));
6771   PetscFunctionReturn(PETSC_SUCCESS);
6772 }
6773 
6774 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6775 {
6776   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6777   Mat                  A = mpiaij->A, B = mpiaij->B;
6778   PetscScalar         *Aa, *Ba;
6779   PetscScalar         *sendbuf, *recvbuf;
6780   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6781   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6782   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6783   const PetscCount    *Cperm1;
6784   PetscContainer       container;
6785   MatCOOStruct_MPIAIJ *coo;
6786 
6787   PetscFunctionBegin;
6788   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6789   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6790   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6791   sendbuf = coo->sendbuf;
6792   recvbuf = coo->recvbuf;
6793   Ajmap1  = coo->Ajmap1;
6794   Ajmap2  = coo->Ajmap2;
6795   Aimap2  = coo->Aimap2;
6796   Bjmap1  = coo->Bjmap1;
6797   Bjmap2  = coo->Bjmap2;
6798   Bimap2  = coo->Bimap2;
6799   Aperm1  = coo->Aperm1;
6800   Aperm2  = coo->Aperm2;
6801   Bperm1  = coo->Bperm1;
6802   Bperm2  = coo->Bperm2;
6803   Cperm1  = coo->Cperm1;
6804 
6805   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6806   PetscCall(MatSeqAIJGetArray(B, &Ba));
6807 
6808   /* Pack entries to be sent to remote */
6809   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6810 
6811   /* Send remote entries to their owner and overlap the communication with local computation */
6812   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6813   /* Add local entries to A and B */
6814   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6815     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6816     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6817     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6818   }
6819   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6820     PetscScalar sum = 0.0;
6821     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6822     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6823   }
6824   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6825 
6826   /* Add received remote entries to A and B */
6827   for (PetscCount i = 0; i < coo->Annz2; i++) {
6828     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6829   }
6830   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6831     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6832   }
6833   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6834   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6835   PetscFunctionReturn(PETSC_SUCCESS);
6836 }
6837 
6838 /*MC
6839    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6840 
6841    Options Database Keys:
6842 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6843 
6844    Level: beginner
6845 
6846    Notes:
6847    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6848     in this case the values associated with the rows and columns one passes in are set to zero
6849     in the matrix
6850 
6851     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6852     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6853 
6854 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6855 M*/
6856 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6857 {
6858   Mat_MPIAIJ *b;
6859   PetscMPIInt size;
6860 
6861   PetscFunctionBegin;
6862   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6863 
6864   PetscCall(PetscNew(&b));
6865   B->data       = (void *)b;
6866   B->ops[0]     = MatOps_Values;
6867   B->assembled  = PETSC_FALSE;
6868   B->insertmode = NOT_SET_VALUES;
6869   b->size       = size;
6870 
6871   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6872 
6873   /* build cache for off array entries formed */
6874   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6875 
6876   b->donotstash  = PETSC_FALSE;
6877   b->colmap      = NULL;
6878   b->garray      = NULL;
6879   b->roworiented = PETSC_TRUE;
6880 
6881   /* stuff used for matrix vector multiply */
6882   b->lvec  = NULL;
6883   b->Mvctx = NULL;
6884 
6885   /* stuff for MatGetRow() */
6886   b->rowindices   = NULL;
6887   b->rowvalues    = NULL;
6888   b->getrowactive = PETSC_FALSE;
6889 
6890   /* flexible pointer used in CUSPARSE classes */
6891   b->spptr = NULL;
6892 
6893   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6894   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6897   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ));
6900   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6902   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6904 #if defined(PETSC_HAVE_CUDA)
6905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6906 #endif
6907 #if defined(PETSC_HAVE_HIP)
6908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6909 #endif
6910 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6912 #endif
6913 #if defined(PETSC_HAVE_MKL_SPARSE)
6914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6915 #endif
6916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6920 #if defined(PETSC_HAVE_ELEMENTAL)
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6922 #endif
6923 #if defined(PETSC_HAVE_SCALAPACK)
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6925 #endif
6926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6928 #if defined(PETSC_HAVE_HYPRE)
6929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6931 #endif
6932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6936   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6937   PetscFunctionReturn(PETSC_SUCCESS);
6938 }
6939 
6940 /*@
6941   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6942   and "off-diagonal" part of the matrix in CSR format.
6943 
6944   Collective
6945 
6946   Input Parameters:
6947 + comm - MPI communicator
6948 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6949 . n    - This value should be the same as the local size used in creating the
6950          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6951          calculated if `N` is given) For square matrices `n` is almost always `m`.
6952 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6953 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6954 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6955 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6956 . a    - matrix values
6957 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6958 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6959 - oa   - matrix values
6960 
6961   Output Parameter:
6962 . mat - the matrix
6963 
6964   Level: advanced
6965 
6966   Notes:
6967   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6968   must free the arrays once the matrix has been destroyed and not before.
6969 
6970   The `i` and `j` indices are 0 based
6971 
6972   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6973 
6974   This sets local rows and cannot be used to set off-processor values.
6975 
6976   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6977   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6978   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6979   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6980   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6981   communication if it is known that only local entries will be set.
6982 
6983 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6984           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6985 @*/
6986 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6987 {
6988   Mat_MPIAIJ *maij;
6989 
6990   PetscFunctionBegin;
6991   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6992   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6993   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6994   PetscCall(MatCreate(comm, mat));
6995   PetscCall(MatSetSizes(*mat, m, n, M, N));
6996   PetscCall(MatSetType(*mat, MATMPIAIJ));
6997   maij = (Mat_MPIAIJ *)(*mat)->data;
6998 
6999   (*mat)->preallocated = PETSC_TRUE;
7000 
7001   PetscCall(PetscLayoutSetUp((*mat)->rmap));
7002   PetscCall(PetscLayoutSetUp((*mat)->cmap));
7003 
7004   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
7005   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7006 
7007   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7008   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7009   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7010   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7011   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7012   PetscFunctionReturn(PETSC_SUCCESS);
7013 }
7014 
7015 typedef struct {
7016   Mat       *mp;    /* intermediate products */
7017   PetscBool *mptmp; /* is the intermediate product temporary ? */
7018   PetscInt   cp;    /* number of intermediate products */
7019 
7020   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7021   PetscInt    *startsj_s, *startsj_r;
7022   PetscScalar *bufa;
7023   Mat          P_oth;
7024 
7025   /* may take advantage of merging product->B */
7026   Mat Bloc; /* B-local by merging diag and off-diag */
7027 
7028   /* cusparse does not have support to split between symbolic and numeric phases.
7029      When api_user is true, we don't need to update the numerical values
7030      of the temporary storage */
7031   PetscBool reusesym;
7032 
7033   /* support for COO values insertion */
7034   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7035   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7036   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7037   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7038   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7039   PetscMemType mtype;
7040 
7041   /* customization */
7042   PetscBool abmerge;
7043   PetscBool P_oth_bind;
7044 } MatMatMPIAIJBACKEND;
7045 
7046 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7047 {
7048   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7049   PetscInt             i;
7050 
7051   PetscFunctionBegin;
7052   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7053   PetscCall(PetscFree(mmdata->bufa));
7054   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7055   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7056   PetscCall(MatDestroy(&mmdata->P_oth));
7057   PetscCall(MatDestroy(&mmdata->Bloc));
7058   PetscCall(PetscSFDestroy(&mmdata->sf));
7059   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7060   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7061   PetscCall(PetscFree(mmdata->own[0]));
7062   PetscCall(PetscFree(mmdata->own));
7063   PetscCall(PetscFree(mmdata->off[0]));
7064   PetscCall(PetscFree(mmdata->off));
7065   PetscCall(PetscFree(mmdata));
7066   PetscFunctionReturn(PETSC_SUCCESS);
7067 }
7068 
7069 /* Copy selected n entries with indices in idx[] of A to v[].
7070    If idx is NULL, copy the whole data array of A to v[]
7071  */
7072 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7073 {
7074   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7075 
7076   PetscFunctionBegin;
7077   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7078   if (f) {
7079     PetscCall((*f)(A, n, idx, v));
7080   } else {
7081     const PetscScalar *vv;
7082 
7083     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7084     if (n && idx) {
7085       PetscScalar    *w  = v;
7086       const PetscInt *oi = idx;
7087       PetscInt        j;
7088 
7089       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7090     } else {
7091       PetscCall(PetscArraycpy(v, vv, n));
7092     }
7093     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7094   }
7095   PetscFunctionReturn(PETSC_SUCCESS);
7096 }
7097 
7098 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7099 {
7100   MatMatMPIAIJBACKEND *mmdata;
7101   PetscInt             i, n_d, n_o;
7102 
7103   PetscFunctionBegin;
7104   MatCheckProduct(C, 1);
7105   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7106   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7107   if (!mmdata->reusesym) { /* update temporary matrices */
7108     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7109     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7110   }
7111   mmdata->reusesym = PETSC_FALSE;
7112 
7113   for (i = 0; i < mmdata->cp; i++) {
7114     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7115     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7116   }
7117   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7118     PetscInt noff;
7119 
7120     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7121     if (mmdata->mptmp[i]) continue;
7122     if (noff) {
7123       PetscInt nown;
7124 
7125       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7126       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7127       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7128       n_o += noff;
7129       n_d += nown;
7130     } else {
7131       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7132 
7133       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7134       n_d += mm->nz;
7135     }
7136   }
7137   if (mmdata->hasoffproc) { /* offprocess insertion */
7138     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7139     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7140   }
7141   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7142   PetscFunctionReturn(PETSC_SUCCESS);
7143 }
7144 
7145 /* Support for Pt * A, A * P, or Pt * A * P */
7146 #define MAX_NUMBER_INTERMEDIATE 4
7147 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7148 {
7149   Mat_Product           *product = C->product;
7150   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7151   Mat_MPIAIJ            *a, *p;
7152   MatMatMPIAIJBACKEND   *mmdata;
7153   ISLocalToGlobalMapping P_oth_l2g = NULL;
7154   IS                     glob      = NULL;
7155   const char            *prefix;
7156   char                   pprefix[256];
7157   const PetscInt        *globidx, *P_oth_idx;
7158   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7159   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7160   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7161                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7162                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7163   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7164 
7165   MatProductType ptype;
7166   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7167   PetscMPIInt    size;
7168 
7169   PetscFunctionBegin;
7170   MatCheckProduct(C, 1);
7171   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7172   ptype = product->type;
7173   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7174     ptype                                          = MATPRODUCT_AB;
7175     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7176   }
7177   switch (ptype) {
7178   case MATPRODUCT_AB:
7179     A          = product->A;
7180     P          = product->B;
7181     m          = A->rmap->n;
7182     n          = P->cmap->n;
7183     M          = A->rmap->N;
7184     N          = P->cmap->N;
7185     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7186     break;
7187   case MATPRODUCT_AtB:
7188     P          = product->A;
7189     A          = product->B;
7190     m          = P->cmap->n;
7191     n          = A->cmap->n;
7192     M          = P->cmap->N;
7193     N          = A->cmap->N;
7194     hasoffproc = PETSC_TRUE;
7195     break;
7196   case MATPRODUCT_PtAP:
7197     A          = product->A;
7198     P          = product->B;
7199     m          = P->cmap->n;
7200     n          = P->cmap->n;
7201     M          = P->cmap->N;
7202     N          = P->cmap->N;
7203     hasoffproc = PETSC_TRUE;
7204     break;
7205   default:
7206     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7207   }
7208   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7209   if (size == 1) hasoffproc = PETSC_FALSE;
7210 
7211   /* defaults */
7212   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7213     mp[i]    = NULL;
7214     mptmp[i] = PETSC_FALSE;
7215     rmapt[i] = -1;
7216     cmapt[i] = -1;
7217     rmapa[i] = NULL;
7218     cmapa[i] = NULL;
7219   }
7220 
7221   /* customization */
7222   PetscCall(PetscNew(&mmdata));
7223   mmdata->reusesym = product->api_user;
7224   if (ptype == MATPRODUCT_AB) {
7225     if (product->api_user) {
7226       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7227       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7228       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7229       PetscOptionsEnd();
7230     } else {
7231       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7232       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7233       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7234       PetscOptionsEnd();
7235     }
7236   } else if (ptype == MATPRODUCT_PtAP) {
7237     if (product->api_user) {
7238       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7239       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7240       PetscOptionsEnd();
7241     } else {
7242       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7243       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7244       PetscOptionsEnd();
7245     }
7246   }
7247   a = (Mat_MPIAIJ *)A->data;
7248   p = (Mat_MPIAIJ *)P->data;
7249   PetscCall(MatSetSizes(C, m, n, M, N));
7250   PetscCall(PetscLayoutSetUp(C->rmap));
7251   PetscCall(PetscLayoutSetUp(C->cmap));
7252   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7253   PetscCall(MatGetOptionsPrefix(C, &prefix));
7254 
7255   cp = 0;
7256   switch (ptype) {
7257   case MATPRODUCT_AB: /* A * P */
7258     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7259 
7260     /* A_diag * P_local (merged or not) */
7261     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7262       /* P is product->B */
7263       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7264       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7265       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7266       PetscCall(MatProductSetFill(mp[cp], product->fill));
7267       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7268       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7269       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7270       mp[cp]->product->api_user = product->api_user;
7271       PetscCall(MatProductSetFromOptions(mp[cp]));
7272       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7273       PetscCall(ISGetIndices(glob, &globidx));
7274       rmapt[cp] = 1;
7275       cmapt[cp] = 2;
7276       cmapa[cp] = globidx;
7277       mptmp[cp] = PETSC_FALSE;
7278       cp++;
7279     } else { /* A_diag * P_diag and A_diag * P_off */
7280       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7281       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7282       PetscCall(MatProductSetFill(mp[cp], product->fill));
7283       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7284       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7285       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7286       mp[cp]->product->api_user = product->api_user;
7287       PetscCall(MatProductSetFromOptions(mp[cp]));
7288       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7289       rmapt[cp] = 1;
7290       cmapt[cp] = 1;
7291       mptmp[cp] = PETSC_FALSE;
7292       cp++;
7293       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7294       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7295       PetscCall(MatProductSetFill(mp[cp], product->fill));
7296       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7297       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7298       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7299       mp[cp]->product->api_user = product->api_user;
7300       PetscCall(MatProductSetFromOptions(mp[cp]));
7301       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7302       rmapt[cp] = 1;
7303       cmapt[cp] = 2;
7304       cmapa[cp] = p->garray;
7305       mptmp[cp] = PETSC_FALSE;
7306       cp++;
7307     }
7308 
7309     /* A_off * P_other */
7310     if (mmdata->P_oth) {
7311       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7312       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7313       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7314       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7315       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7316       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7317       PetscCall(MatProductSetFill(mp[cp], product->fill));
7318       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7319       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7320       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7321       mp[cp]->product->api_user = product->api_user;
7322       PetscCall(MatProductSetFromOptions(mp[cp]));
7323       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7324       rmapt[cp] = 1;
7325       cmapt[cp] = 2;
7326       cmapa[cp] = P_oth_idx;
7327       mptmp[cp] = PETSC_FALSE;
7328       cp++;
7329     }
7330     break;
7331 
7332   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7333     /* A is product->B */
7334     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7335     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7336       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7337       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7338       PetscCall(MatProductSetFill(mp[cp], product->fill));
7339       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7340       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7341       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7342       mp[cp]->product->api_user = product->api_user;
7343       PetscCall(MatProductSetFromOptions(mp[cp]));
7344       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7345       PetscCall(ISGetIndices(glob, &globidx));
7346       rmapt[cp] = 2;
7347       rmapa[cp] = globidx;
7348       cmapt[cp] = 2;
7349       cmapa[cp] = globidx;
7350       mptmp[cp] = PETSC_FALSE;
7351       cp++;
7352     } else {
7353       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7354       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7355       PetscCall(MatProductSetFill(mp[cp], product->fill));
7356       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7357       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7358       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7359       mp[cp]->product->api_user = product->api_user;
7360       PetscCall(MatProductSetFromOptions(mp[cp]));
7361       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7362       PetscCall(ISGetIndices(glob, &globidx));
7363       rmapt[cp] = 1;
7364       cmapt[cp] = 2;
7365       cmapa[cp] = globidx;
7366       mptmp[cp] = PETSC_FALSE;
7367       cp++;
7368       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7369       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7370       PetscCall(MatProductSetFill(mp[cp], product->fill));
7371       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7372       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7373       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7374       mp[cp]->product->api_user = product->api_user;
7375       PetscCall(MatProductSetFromOptions(mp[cp]));
7376       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7377       rmapt[cp] = 2;
7378       rmapa[cp] = p->garray;
7379       cmapt[cp] = 2;
7380       cmapa[cp] = globidx;
7381       mptmp[cp] = PETSC_FALSE;
7382       cp++;
7383     }
7384     break;
7385   case MATPRODUCT_PtAP:
7386     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7387     /* P is product->B */
7388     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7389     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7390     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7391     PetscCall(MatProductSetFill(mp[cp], product->fill));
7392     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7393     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7394     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7395     mp[cp]->product->api_user = product->api_user;
7396     PetscCall(MatProductSetFromOptions(mp[cp]));
7397     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7398     PetscCall(ISGetIndices(glob, &globidx));
7399     rmapt[cp] = 2;
7400     rmapa[cp] = globidx;
7401     cmapt[cp] = 2;
7402     cmapa[cp] = globidx;
7403     mptmp[cp] = PETSC_FALSE;
7404     cp++;
7405     if (mmdata->P_oth) {
7406       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7407       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7408       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7409       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7410       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7411       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7412       PetscCall(MatProductSetFill(mp[cp], product->fill));
7413       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7414       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7415       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7416       mp[cp]->product->api_user = product->api_user;
7417       PetscCall(MatProductSetFromOptions(mp[cp]));
7418       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7419       mptmp[cp] = PETSC_TRUE;
7420       cp++;
7421       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7422       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7423       PetscCall(MatProductSetFill(mp[cp], product->fill));
7424       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7425       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7426       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7427       mp[cp]->product->api_user = product->api_user;
7428       PetscCall(MatProductSetFromOptions(mp[cp]));
7429       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7430       rmapt[cp] = 2;
7431       rmapa[cp] = globidx;
7432       cmapt[cp] = 2;
7433       cmapa[cp] = P_oth_idx;
7434       mptmp[cp] = PETSC_FALSE;
7435       cp++;
7436     }
7437     break;
7438   default:
7439     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7440   }
7441   /* sanity check */
7442   if (size > 1)
7443     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7444 
7445   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7446   for (i = 0; i < cp; i++) {
7447     mmdata->mp[i]    = mp[i];
7448     mmdata->mptmp[i] = mptmp[i];
7449   }
7450   mmdata->cp             = cp;
7451   C->product->data       = mmdata;
7452   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7453   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7454 
7455   /* memory type */
7456   mmdata->mtype = PETSC_MEMTYPE_HOST;
7457   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7458   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7459   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7460   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7461   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7462   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7463 
7464   /* prepare coo coordinates for values insertion */
7465 
7466   /* count total nonzeros of those intermediate seqaij Mats
7467     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7468     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7469     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7470   */
7471   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7472     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7473     if (mptmp[cp]) continue;
7474     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7475       const PetscInt *rmap = rmapa[cp];
7476       const PetscInt  mr   = mp[cp]->rmap->n;
7477       const PetscInt  rs   = C->rmap->rstart;
7478       const PetscInt  re   = C->rmap->rend;
7479       const PetscInt *ii   = mm->i;
7480       for (i = 0; i < mr; i++) {
7481         const PetscInt gr = rmap[i];
7482         const PetscInt nz = ii[i + 1] - ii[i];
7483         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7484         else ncoo_oown += nz;                  /* this row is local */
7485       }
7486     } else ncoo_d += mm->nz;
7487   }
7488 
7489   /*
7490     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7491 
7492     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7493 
7494     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7495 
7496     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7497     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7498     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7499 
7500     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7501     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7502   */
7503   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7504   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7505 
7506   /* gather (i,j) of nonzeros inserted by remote procs */
7507   if (hasoffproc) {
7508     PetscSF  msf;
7509     PetscInt ncoo2, *coo_i2, *coo_j2;
7510 
7511     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7512     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7513     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7514 
7515     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7516       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7517       PetscInt   *idxoff = mmdata->off[cp];
7518       PetscInt   *idxown = mmdata->own[cp];
7519       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7520         const PetscInt *rmap = rmapa[cp];
7521         const PetscInt *cmap = cmapa[cp];
7522         const PetscInt *ii   = mm->i;
7523         PetscInt       *coi  = coo_i + ncoo_o;
7524         PetscInt       *coj  = coo_j + ncoo_o;
7525         const PetscInt  mr   = mp[cp]->rmap->n;
7526         const PetscInt  rs   = C->rmap->rstart;
7527         const PetscInt  re   = C->rmap->rend;
7528         const PetscInt  cs   = C->cmap->rstart;
7529         for (i = 0; i < mr; i++) {
7530           const PetscInt *jj = mm->j + ii[i];
7531           const PetscInt  gr = rmap[i];
7532           const PetscInt  nz = ii[i + 1] - ii[i];
7533           if (gr < rs || gr >= re) { /* this is an offproc row */
7534             for (j = ii[i]; j < ii[i + 1]; j++) {
7535               *coi++    = gr;
7536               *idxoff++ = j;
7537             }
7538             if (!cmapt[cp]) { /* already global */
7539               for (j = 0; j < nz; j++) *coj++ = jj[j];
7540             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7541               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7542             } else { /* offdiag */
7543               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7544             }
7545             ncoo_o += nz;
7546           } else { /* this is a local row */
7547             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7548           }
7549         }
7550       }
7551       mmdata->off[cp + 1] = idxoff;
7552       mmdata->own[cp + 1] = idxown;
7553     }
7554 
7555     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7556     PetscInt incoo_o;
7557     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7558     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7559     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7560     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7561     ncoo = ncoo_d + ncoo_oown + ncoo2;
7562     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7563     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7564     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7565     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7566     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7567     PetscCall(PetscFree2(coo_i, coo_j));
7568     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7569     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7570     coo_i = coo_i2;
7571     coo_j = coo_j2;
7572   } else { /* no offproc values insertion */
7573     ncoo = ncoo_d;
7574     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7575 
7576     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7577     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7578     PetscCall(PetscSFSetUp(mmdata->sf));
7579   }
7580   mmdata->hasoffproc = hasoffproc;
7581 
7582   /* gather (i,j) of nonzeros inserted locally */
7583   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7584     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7585     PetscInt       *coi  = coo_i + ncoo_d;
7586     PetscInt       *coj  = coo_j + ncoo_d;
7587     const PetscInt *jj   = mm->j;
7588     const PetscInt *ii   = mm->i;
7589     const PetscInt *cmap = cmapa[cp];
7590     const PetscInt *rmap = rmapa[cp];
7591     const PetscInt  mr   = mp[cp]->rmap->n;
7592     const PetscInt  rs   = C->rmap->rstart;
7593     const PetscInt  re   = C->rmap->rend;
7594     const PetscInt  cs   = C->cmap->rstart;
7595 
7596     if (mptmp[cp]) continue;
7597     if (rmapt[cp] == 1) { /* consecutive rows */
7598       /* fill coo_i */
7599       for (i = 0; i < mr; i++) {
7600         const PetscInt gr = i + rs;
7601         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7602       }
7603       /* fill coo_j */
7604       if (!cmapt[cp]) { /* type-0, already global */
7605         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7606       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7607         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7608       } else {                                            /* type-2, local to global for sparse columns */
7609         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7610       }
7611       ncoo_d += mm->nz;
7612     } else if (rmapt[cp] == 2) { /* sparse rows */
7613       for (i = 0; i < mr; i++) {
7614         const PetscInt *jj = mm->j + ii[i];
7615         const PetscInt  gr = rmap[i];
7616         const PetscInt  nz = ii[i + 1] - ii[i];
7617         if (gr >= rs && gr < re) { /* local rows */
7618           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7619           if (!cmapt[cp]) { /* type-0, already global */
7620             for (j = 0; j < nz; j++) *coj++ = jj[j];
7621           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7622             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7623           } else { /* type-2, local to global for sparse columns */
7624             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7625           }
7626           ncoo_d += nz;
7627         }
7628       }
7629     }
7630   }
7631   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7632   PetscCall(ISDestroy(&glob));
7633   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7634   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7635   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7636   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7637 
7638   /* set block sizes */
7639   A = product->A;
7640   P = product->B;
7641   switch (ptype) {
7642   case MATPRODUCT_PtAP:
7643     PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs));
7644     break;
7645   case MATPRODUCT_RARt:
7646     PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs));
7647     break;
7648   case MATPRODUCT_ABC:
7649     PetscCall(MatSetBlockSizesFromMats(C, A, product->C));
7650     break;
7651   case MATPRODUCT_AB:
7652     PetscCall(MatSetBlockSizesFromMats(C, A, P));
7653     break;
7654   case MATPRODUCT_AtB:
7655     PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs));
7656     break;
7657   case MATPRODUCT_ABt:
7658     PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs));
7659     break;
7660   default:
7661     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]);
7662   }
7663 
7664   /* preallocate with COO data */
7665   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7666   PetscCall(PetscFree2(coo_i, coo_j));
7667   PetscFunctionReturn(PETSC_SUCCESS);
7668 }
7669 
7670 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7671 {
7672   Mat_Product *product = mat->product;
7673 #if defined(PETSC_HAVE_DEVICE)
7674   PetscBool match  = PETSC_FALSE;
7675   PetscBool usecpu = PETSC_FALSE;
7676 #else
7677   PetscBool match = PETSC_TRUE;
7678 #endif
7679 
7680   PetscFunctionBegin;
7681   MatCheckProduct(mat, 1);
7682 #if defined(PETSC_HAVE_DEVICE)
7683   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7684   if (match) { /* we can always fallback to the CPU if requested */
7685     switch (product->type) {
7686     case MATPRODUCT_AB:
7687       if (product->api_user) {
7688         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7689         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7690         PetscOptionsEnd();
7691       } else {
7692         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7693         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7694         PetscOptionsEnd();
7695       }
7696       break;
7697     case MATPRODUCT_AtB:
7698       if (product->api_user) {
7699         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7700         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7701         PetscOptionsEnd();
7702       } else {
7703         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7704         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7705         PetscOptionsEnd();
7706       }
7707       break;
7708     case MATPRODUCT_PtAP:
7709       if (product->api_user) {
7710         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7711         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7712         PetscOptionsEnd();
7713       } else {
7714         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7715         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7716         PetscOptionsEnd();
7717       }
7718       break;
7719     default:
7720       break;
7721     }
7722     match = (PetscBool)!usecpu;
7723   }
7724 #endif
7725   if (match) {
7726     switch (product->type) {
7727     case MATPRODUCT_AB:
7728     case MATPRODUCT_AtB:
7729     case MATPRODUCT_PtAP:
7730       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7731       break;
7732     default:
7733       break;
7734     }
7735   }
7736   /* fallback to MPIAIJ ops */
7737   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7738   PetscFunctionReturn(PETSC_SUCCESS);
7739 }
7740 
7741 /*
7742    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7743 
7744    n - the number of block indices in cc[]
7745    cc - the block indices (must be large enough to contain the indices)
7746 */
7747 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7748 {
7749   PetscInt        cnt = -1, nidx, j;
7750   const PetscInt *idx;
7751 
7752   PetscFunctionBegin;
7753   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7754   if (nidx) {
7755     cnt     = 0;
7756     cc[cnt] = idx[0] / bs;
7757     for (j = 1; j < nidx; j++) {
7758       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7759     }
7760   }
7761   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7762   *n = cnt + 1;
7763   PetscFunctionReturn(PETSC_SUCCESS);
7764 }
7765 
7766 /*
7767     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7768 
7769     ncollapsed - the number of block indices
7770     collapsed - the block indices (must be large enough to contain the indices)
7771 */
7772 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7773 {
7774   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7775 
7776   PetscFunctionBegin;
7777   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7778   for (i = start + 1; i < start + bs; i++) {
7779     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7780     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7781     cprevtmp = cprev;
7782     cprev    = merged;
7783     merged   = cprevtmp;
7784   }
7785   *ncollapsed = nprev;
7786   if (collapsed) *collapsed = cprev;
7787   PetscFunctionReturn(PETSC_SUCCESS);
7788 }
7789 
7790 /*
7791  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7792 
7793  Input Parameter:
7794  . Amat - matrix
7795  - symmetrize - make the result symmetric
7796  + scale - scale with diagonal
7797 
7798  Output Parameter:
7799  . a_Gmat - output scalar graph >= 0
7800 
7801 */
7802 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7803 {
7804   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7805   MPI_Comm  comm;
7806   Mat       Gmat;
7807   PetscBool ismpiaij, isseqaij;
7808   Mat       a, b, c;
7809   MatType   jtype;
7810 
7811   PetscFunctionBegin;
7812   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7813   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7814   PetscCall(MatGetSize(Amat, &MM, &NN));
7815   PetscCall(MatGetBlockSize(Amat, &bs));
7816   nloc = (Iend - Istart) / bs;
7817 
7818   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7819   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7820   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7821 
7822   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7823   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7824      implementation */
7825   if (bs > 1) {
7826     PetscCall(MatGetType(Amat, &jtype));
7827     PetscCall(MatCreate(comm, &Gmat));
7828     PetscCall(MatSetType(Gmat, jtype));
7829     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7830     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7831     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7832       PetscInt  *d_nnz, *o_nnz;
7833       MatScalar *aa, val, *AA;
7834       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7835 
7836       if (isseqaij) {
7837         a = Amat;
7838         b = NULL;
7839       } else {
7840         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7841         a             = d->A;
7842         b             = d->B;
7843       }
7844       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7845       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7846       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7847         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7848         const PetscInt *cols1, *cols2;
7849 
7850         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7851           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7852           nnz[brow / bs] = nc2 / bs;
7853           if (nc2 % bs) ok = 0;
7854           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7855           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7856             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7857             if (nc1 != nc2) ok = 0;
7858             else {
7859               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7860                 if (cols1[jj] != cols2[jj]) ok = 0;
7861                 if (cols1[jj] % bs != jj % bs) ok = 0;
7862               }
7863             }
7864             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7865           }
7866           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7867           if (!ok) {
7868             PetscCall(PetscFree2(d_nnz, o_nnz));
7869             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7870             goto old_bs;
7871           }
7872         }
7873       }
7874       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7875       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7876       PetscCall(PetscFree2(d_nnz, o_nnz));
7877       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7878       // diag
7879       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7880         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7881 
7882         ai = aseq->i;
7883         n  = ai[brow + 1] - ai[brow];
7884         aj = aseq->j + ai[brow];
7885         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7886           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7887           val        = 0;
7888           if (index_size == 0) {
7889             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7890               aa = aseq->a + ai[brow + ii] + k;
7891               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7892                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7893               }
7894             }
7895           } else {                                            // use (index,index) value if provided
7896             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7897               PetscInt ii = index[iii];
7898               aa          = aseq->a + ai[brow + ii] + k;
7899               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7900                 PetscInt jj = index[jjj];
7901                 val += PetscAbs(PetscRealPart(aa[jj]));
7902               }
7903             }
7904           }
7905           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7906           AA[k / bs] = val;
7907         }
7908         grow = Istart / bs + brow / bs;
7909         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7910       }
7911       // off-diag
7912       if (ismpiaij) {
7913         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7914         const PetscScalar *vals;
7915         const PetscInt    *cols, *garray = aij->garray;
7916 
7917         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7918         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7919           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7920           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7921             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7922             AA[k / bs] = 0;
7923             AJ[cidx]   = garray[cols[k]] / bs;
7924           }
7925           nc = ncols / bs;
7926           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7927           if (index_size == 0) {
7928             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7929               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7930               for (PetscInt k = 0; k < ncols; k += bs) {
7931                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7932                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7933                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7934                 }
7935               }
7936               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7937             }
7938           } else {                                            // use (index,index) value if provided
7939             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7940               PetscInt ii = index[iii];
7941               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7942               for (PetscInt k = 0; k < ncols; k += bs) {
7943                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7944                   PetscInt jj = index[jjj];
7945                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7946                 }
7947               }
7948               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7949             }
7950           }
7951           grow = Istart / bs + brow / bs;
7952           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7953         }
7954       }
7955       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7956       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7957       PetscCall(PetscFree2(AA, AJ));
7958     } else {
7959       const PetscScalar *vals;
7960       const PetscInt    *idx;
7961       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7962     old_bs:
7963       /*
7964        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7965        */
7966       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7967       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7968       if (isseqaij) {
7969         PetscInt max_d_nnz;
7970 
7971         /*
7972          Determine exact preallocation count for (sequential) scalar matrix
7973          */
7974         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7975         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7976         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7977         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7978         PetscCall(PetscFree3(w0, w1, w2));
7979       } else if (ismpiaij) {
7980         Mat             Daij, Oaij;
7981         const PetscInt *garray;
7982         PetscInt        max_d_nnz;
7983 
7984         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7985         /*
7986          Determine exact preallocation count for diagonal block portion of scalar matrix
7987          */
7988         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7989         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7990         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7991         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7992         PetscCall(PetscFree3(w0, w1, w2));
7993         /*
7994          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7995          */
7996         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7997           o_nnz[jj] = 0;
7998           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7999             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
8000             o_nnz[jj] += ncols;
8001             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
8002           }
8003           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
8004         }
8005       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
8006       /* get scalar copy (norms) of matrix */
8007       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
8008       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
8009       PetscCall(PetscFree2(d_nnz, o_nnz));
8010       for (Ii = Istart; Ii < Iend; Ii++) {
8011         PetscInt dest_row = Ii / bs;
8012 
8013         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
8014         for (jj = 0; jj < ncols; jj++) {
8015           PetscInt    dest_col = idx[jj] / bs;
8016           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
8017 
8018           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
8019         }
8020         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
8021       }
8022       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
8023       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
8024     }
8025   } else {
8026     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8027     else {
8028       Gmat = Amat;
8029       PetscCall(PetscObjectReference((PetscObject)Gmat));
8030     }
8031     if (isseqaij) {
8032       a = Gmat;
8033       b = NULL;
8034     } else {
8035       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8036       a             = d->A;
8037       b             = d->B;
8038     }
8039     if (filter >= 0 || scale) {
8040       /* take absolute value of each entry */
8041       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8042         MatInfo      info;
8043         PetscScalar *avals;
8044 
8045         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8046         PetscCall(MatSeqAIJGetArray(c, &avals));
8047         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8048         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8049       }
8050     }
8051   }
8052   if (symmetrize) {
8053     PetscBool isset, issym;
8054 
8055     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8056     if (!isset || !issym) {
8057       Mat matTrans;
8058 
8059       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8060       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8061       PetscCall(MatDestroy(&matTrans));
8062     }
8063     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8064   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8065   if (scale) {
8066     /* scale c for all diagonal values = 1 or -1 */
8067     Vec diag;
8068 
8069     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8070     PetscCall(MatGetDiagonal(Gmat, diag));
8071     PetscCall(VecReciprocal(diag));
8072     PetscCall(VecSqrtAbs(diag));
8073     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8074     PetscCall(VecDestroy(&diag));
8075   }
8076   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8077   if (filter >= 0) {
8078     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8079     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8080   }
8081   *a_Gmat = Gmat;
8082   PetscFunctionReturn(PETSC_SUCCESS);
8083 }
8084 
8085 /*
8086     Special version for direct calls from Fortran
8087 */
8088 
8089 /* Change these macros so can be used in void function */
8090 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8091 #undef PetscCall
8092 #define PetscCall(...) \
8093   do { \
8094     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8095     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8096       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8097       return; \
8098     } \
8099   } while (0)
8100 
8101 #undef SETERRQ
8102 #define SETERRQ(comm, ierr, ...) \
8103   do { \
8104     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8105     return; \
8106   } while (0)
8107 
8108 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8109   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8110 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8111   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8112 #else
8113 #endif
8114 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8115 {
8116   Mat         mat = *mmat;
8117   PetscInt    m = *mm, n = *mn;
8118   InsertMode  addv = *maddv;
8119   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8120   PetscScalar value;
8121 
8122   MatCheckPreallocated(mat, 1);
8123   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8124   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8125   {
8126     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8127     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8128     PetscBool roworiented = aij->roworiented;
8129 
8130     /* Some Variables required in the macro */
8131     Mat         A     = aij->A;
8132     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8133     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8134     MatScalar  *aa;
8135     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8136     Mat         B                 = aij->B;
8137     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8138     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8139     MatScalar  *ba;
8140     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8141      * cannot use "#if defined" inside a macro. */
8142     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8143 
8144     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8145     PetscInt   nonew = a->nonew;
8146     MatScalar *ap1, *ap2;
8147 
8148     PetscFunctionBegin;
8149     PetscCall(MatSeqAIJGetArray(A, &aa));
8150     PetscCall(MatSeqAIJGetArray(B, &ba));
8151     for (i = 0; i < m; i++) {
8152       if (im[i] < 0) continue;
8153       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8154       if (im[i] >= rstart && im[i] < rend) {
8155         row      = im[i] - rstart;
8156         lastcol1 = -1;
8157         rp1      = aj + ai[row];
8158         ap1      = aa + ai[row];
8159         rmax1    = aimax[row];
8160         nrow1    = ailen[row];
8161         low1     = 0;
8162         high1    = nrow1;
8163         lastcol2 = -1;
8164         rp2      = bj + bi[row];
8165         ap2      = ba + bi[row];
8166         rmax2    = bimax[row];
8167         nrow2    = bilen[row];
8168         low2     = 0;
8169         high2    = nrow2;
8170 
8171         for (j = 0; j < n; j++) {
8172           if (roworiented) value = v[i * n + j];
8173           else value = v[i + j * m];
8174           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8175           if (in[j] >= cstart && in[j] < cend) {
8176             col = in[j] - cstart;
8177             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8178           } else if (in[j] < 0) continue;
8179           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8180             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8181           } else {
8182             if (mat->was_assembled) {
8183               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8184 #if defined(PETSC_USE_CTABLE)
8185               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8186               col--;
8187 #else
8188               col = aij->colmap[in[j]] - 1;
8189 #endif
8190               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8191                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8192                 col = in[j];
8193                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8194                 B        = aij->B;
8195                 b        = (Mat_SeqAIJ *)B->data;
8196                 bimax    = b->imax;
8197                 bi       = b->i;
8198                 bilen    = b->ilen;
8199                 bj       = b->j;
8200                 rp2      = bj + bi[row];
8201                 ap2      = ba + bi[row];
8202                 rmax2    = bimax[row];
8203                 nrow2    = bilen[row];
8204                 low2     = 0;
8205                 high2    = nrow2;
8206                 bm       = aij->B->rmap->n;
8207                 ba       = b->a;
8208                 inserted = PETSC_FALSE;
8209               }
8210             } else col = in[j];
8211             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8212           }
8213         }
8214       } else if (!aij->donotstash) {
8215         if (roworiented) {
8216           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8217         } else {
8218           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8219         }
8220       }
8221     }
8222     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8223     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8224   }
8225   PetscFunctionReturnVoid();
8226 }
8227 
8228 /* Undefining these here since they were redefined from their original definition above! No
8229  * other PETSc functions should be defined past this point, as it is impossible to recover the
8230  * original definitions */
8231 #undef PetscCall
8232 #undef SETERRQ
8233