xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7f296bb328fcd4c99f2da7bfe8ba7ed8a4ebceee)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */
10 #define TYPE AIJ
11 #define TYPE_AIJ
12 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
13 #undef TYPE
14 #undef TYPE_AIJ
15 
16 static PetscErrorCode MatReset_MPIAIJ(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
19 
20   PetscFunctionBegin;
21   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
22   PetscCall(MatStashDestroy_Private(&mat->stash));
23   PetscCall(VecDestroy(&aij->diag));
24   PetscCall(MatDestroy(&aij->A));
25   PetscCall(MatDestroy(&aij->B));
26 #if defined(PETSC_USE_CTABLE)
27   PetscCall(PetscHMapIDestroy(&aij->colmap));
28 #else
29   PetscCall(PetscFree(aij->colmap));
30 #endif
31   PetscCall(PetscFree(aij->garray));
32   PetscCall(VecDestroy(&aij->lvec));
33   PetscCall(VecScatterDestroy(&aij->Mvctx));
34   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
35   PetscCall(PetscFree(aij->ld));
36   PetscFunctionReturn(PETSC_SUCCESS);
37 }
38 
39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
42   /* Save the nonzero states of the component matrices because those are what are used to determine
43     the nonzero state of mat */
44   PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate;
45 
46   PetscFunctionBegin;
47   PetscCall(MatReset_MPIAIJ(mat));
48   PetscCall(MatSetUp_MPI_Hash(mat));
49   aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate;
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
54 {
55   PetscFunctionBegin;
56   PetscCall(MatReset_MPIAIJ(mat));
57 
58   PetscCall(PetscFree(mat->data));
59 
60   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
61   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
62 
63   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
74 #if defined(PETSC_HAVE_CUDA)
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
76 #endif
77 #if defined(PETSC_HAVE_HIP)
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
79 #endif
80 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
82 #endif
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
84 #if defined(PETSC_HAVE_ELEMENTAL)
85   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
86 #endif
87 #if defined(PETSC_HAVE_SCALAPACK)
88   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
89 #endif
90 #if defined(PETSC_HAVE_HYPRE)
91   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
92   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
93 #endif
94   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
95   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
96   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
97   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
98   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
99   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
100 #if defined(PETSC_HAVE_MKL_SPARSE)
101   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
102 #endif
103   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
104   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
105   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
106   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
107   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
112 {
113   Mat B;
114 
115   PetscFunctionBegin;
116   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
117   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
118   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
119   PetscCall(MatDestroy(&B));
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
124 {
125   Mat B;
126 
127   PetscFunctionBegin;
128   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
129   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
130   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*MC
135    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
136 
137    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
138    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
139   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
140   for communicators controlling multiple processes.  It is recommended that you call both of
141   the above preallocation routines for simplicity.
142 
143    Options Database Key:
144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
145 
146   Developer Note:
147   Level: beginner
148 
149     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
150    enough exist.
151 
152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
153 M*/
154 
155 /*MC
156    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
157 
158    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
159    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
160    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
161   for communicators controlling multiple processes.  It is recommended that you call both of
162   the above preallocation routines for simplicity.
163 
164    Options Database Key:
165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
166 
167   Level: beginner
168 
169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
170 M*/
171 
172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
173 {
174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
175 
176   PetscFunctionBegin;
177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
178   A->boundtocpu = flg;
179 #endif
180   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
181   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
182 
183   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
184    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
185    * to differ from the parent matrix. */
186   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
187   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
192 {
193   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
194 
195   PetscFunctionBegin;
196   if (mat->A) {
197     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
198     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
199   }
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
204 {
205   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
206   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
207   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
208   const PetscInt  *ia, *ib;
209   const MatScalar *aa, *bb, *aav, *bav;
210   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
211   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
212 
213   PetscFunctionBegin;
214   *keptrows = NULL;
215 
216   ia = a->i;
217   ib = b->i;
218   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
219   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
220   for (i = 0; i < m; i++) {
221     na = ia[i + 1] - ia[i];
222     nb = ib[i + 1] - ib[i];
223     if (!na && !nb) {
224       cnt++;
225       goto ok1;
226     }
227     aa = aav + ia[i];
228     for (j = 0; j < na; j++) {
229       if (aa[j] != 0.0) goto ok1;
230     }
231     bb = PetscSafePointerPlusOffset(bav, ib[i]);
232     for (j = 0; j < nb; j++) {
233       if (bb[j] != 0.0) goto ok1;
234     }
235     cnt++;
236   ok1:;
237   }
238   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
239   if (!n0rows) {
240     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
241     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
242     PetscFunctionReturn(PETSC_SUCCESS);
243   }
244   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
245   cnt = 0;
246   for (i = 0; i < m; i++) {
247     na = ia[i + 1] - ia[i];
248     nb = ib[i + 1] - ib[i];
249     if (!na && !nb) continue;
250     aa = aav + ia[i];
251     for (j = 0; j < na; j++) {
252       if (aa[j] != 0.0) {
253         rows[cnt++] = rstart + i;
254         goto ok2;
255       }
256     }
257     bb = PetscSafePointerPlusOffset(bav, ib[i]);
258     for (j = 0; j < nb; j++) {
259       if (bb[j] != 0.0) {
260         rows[cnt++] = rstart + i;
261         goto ok2;
262       }
263     }
264   ok2:;
265   }
266   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
267   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
268   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
275   PetscBool   cong;
276 
277   PetscFunctionBegin;
278   PetscCall(MatHasCongruentLayouts(Y, &cong));
279   if (Y->assembled && cong) {
280     PetscCall(MatDiagonalSet(aij->A, D, is));
281   } else {
282     PetscCall(MatDiagonalSet_Default(Y, D, is));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
288 {
289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
290   PetscInt    i, rstart, nrows, *rows;
291 
292   PetscFunctionBegin;
293   *zrows = NULL;
294   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
295   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
296   for (i = 0; i < nrows; i++) rows[i] += rstart;
297   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
302 {
303   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
304   PetscInt           i, m, n, *garray = aij->garray;
305   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
306   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
307   PetscReal         *work;
308   const PetscScalar *dummy;
309 
310   PetscFunctionBegin;
311   PetscCall(MatGetSize(A, &m, &n));
312   PetscCall(PetscCalloc1(n, &work));
313   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
314   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
315   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
316   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
317   if (type == NORM_2) {
318     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
319     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
320   } else if (type == NORM_1) {
321     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
322     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
323   } else if (type == NORM_INFINITY) {
324     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
325     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
326   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
327     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
328     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
329   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
330     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
331     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
332   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
333   if (type == NORM_INFINITY) {
334     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
335   } else {
336     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
337   }
338   PetscCall(PetscFree(work));
339   if (type == NORM_2) {
340     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
341   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
342     for (i = 0; i < n; i++) reductions[i] /= m;
343   }
344   PetscFunctionReturn(PETSC_SUCCESS);
345 }
346 
347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
348 {
349   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
350   IS              sis, gis;
351   const PetscInt *isis, *igis;
352   PetscInt        n, *iis, nsis, ngis, rstart, i;
353 
354   PetscFunctionBegin;
355   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
356   PetscCall(MatFindNonzeroRows(a->B, &gis));
357   PetscCall(ISGetSize(gis, &ngis));
358   PetscCall(ISGetSize(sis, &nsis));
359   PetscCall(ISGetIndices(sis, &isis));
360   PetscCall(ISGetIndices(gis, &igis));
361 
362   PetscCall(PetscMalloc1(ngis + nsis, &iis));
363   PetscCall(PetscArraycpy(iis, igis, ngis));
364   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
365   n = ngis + nsis;
366   PetscCall(PetscSortRemoveDupsInt(&n, iis));
367   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
368   for (i = 0; i < n; i++) iis[i] += rstart;
369   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
370 
371   PetscCall(ISRestoreIndices(sis, &isis));
372   PetscCall(ISRestoreIndices(gis, &igis));
373   PetscCall(ISDestroy(&sis));
374   PetscCall(ISDestroy(&gis));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 /*
379   Local utility routine that creates a mapping from the global column
380 number to the local number in the off-diagonal part of the local
381 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
382 a slightly higher hash table cost; without it it is not scalable (each processor
383 has an order N integer array but is fast to access.
384 */
385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
386 {
387   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
388   PetscInt    n   = aij->B->cmap->n, i;
389 
390   PetscFunctionBegin;
391   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
392 #if defined(PETSC_USE_CTABLE)
393   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
394   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
395 #else
396   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
397   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
398 #endif
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
403   do { \
404     if (col <= lastcol1) low1 = 0; \
405     else high1 = nrow1; \
406     lastcol1 = col; \
407     while (high1 - low1 > 5) { \
408       t = (low1 + high1) / 2; \
409       if (rp1[t] > col) high1 = t; \
410       else low1 = t; \
411     } \
412     for (_i = low1; _i < high1; _i++) { \
413       if (rp1[_i] > col) break; \
414       if (rp1[_i] == col) { \
415         if (addv == ADD_VALUES) { \
416           ap1[_i] += value; \
417           /* Not sure LogFlops will slow dow the code or not */ \
418           (void)PetscLogFlops(1.0); \
419         } else ap1[_i] = value; \
420         goto a_noinsert; \
421       } \
422     } \
423     if (value == 0.0 && ignorezeroentries && row != col) { \
424       low1  = 0; \
425       high1 = nrow1; \
426       goto a_noinsert; \
427     } \
428     if (nonew == 1) { \
429       low1  = 0; \
430       high1 = nrow1; \
431       goto a_noinsert; \
432     } \
433     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
434     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
435     N = nrow1++ - 1; \
436     a->nz++; \
437     high1++; \
438     /* shift up all the later entries in this row */ \
439     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
440     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
441     rp1[_i] = col; \
442     ap1[_i] = value; \
443   a_noinsert:; \
444     ailen[row] = nrow1; \
445   } while (0)
446 
447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
448   do { \
449     if (col <= lastcol2) low2 = 0; \
450     else high2 = nrow2; \
451     lastcol2 = col; \
452     while (high2 - low2 > 5) { \
453       t = (low2 + high2) / 2; \
454       if (rp2[t] > col) high2 = t; \
455       else low2 = t; \
456     } \
457     for (_i = low2; _i < high2; _i++) { \
458       if (rp2[_i] > col) break; \
459       if (rp2[_i] == col) { \
460         if (addv == ADD_VALUES) { \
461           ap2[_i] += value; \
462           (void)PetscLogFlops(1.0); \
463         } else ap2[_i] = value; \
464         goto b_noinsert; \
465       } \
466     } \
467     if (value == 0.0 && ignorezeroentries) { \
468       low2  = 0; \
469       high2 = nrow2; \
470       goto b_noinsert; \
471     } \
472     if (nonew == 1) { \
473       low2  = 0; \
474       high2 = nrow2; \
475       goto b_noinsert; \
476     } \
477     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
478     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
479     N = nrow2++ - 1; \
480     b->nz++; \
481     high2++; \
482     /* shift up all the later entries in this row */ \
483     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
484     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
485     rp2[_i] = col; \
486     ap2[_i] = value; \
487   b_noinsert:; \
488     bilen[row] = nrow2; \
489   } while (0)
490 
491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
492 {
493   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
494   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
495   PetscInt     l, *garray                         = mat->garray, diag;
496   PetscScalar *aa, *ba;
497 
498   PetscFunctionBegin;
499   /* code only works for square matrices A */
500 
501   /* find size of row to the left of the diagonal part */
502   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
503   row = row - diag;
504   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
505     if (garray[b->j[b->i[row] + l]] > diag) break;
506   }
507   if (l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512 
513   /* diagonal part */
514   if (a->i[row + 1] - a->i[row]) {
515     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
516     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
517     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
518   }
519 
520   /* right of diagonal part */
521   if (b->i[row + 1] - b->i[row] - l) {
522     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
523     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
524     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
525   }
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
530 {
531   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
532   PetscScalar value = 0.0;
533   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
534   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
535   PetscBool   roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat         A     = aij->A;
539   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
540   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
541   PetscBool   ignorezeroentries = a->ignorezeroentries;
542   Mat         B                 = aij->B;
543   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
544   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
545   MatScalar  *aa, *ba;
546   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
547   PetscInt    nonew;
548   MatScalar  *ap1, *ap2;
549 
550   PetscFunctionBegin;
551   PetscCall(MatSeqAIJGetArray(A, &aa));
552   PetscCall(MatSeqAIJGetArray(B, &ba));
553   for (i = 0; i < m; i++) {
554     if (im[i] < 0) continue;
555     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
560       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
567       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j = 0; j < n; j++) {
574         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
575         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
580         } else if (in[j] < 0) {
581           continue;
582         } else {
583           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
584           if (mat->was_assembled) {
585             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
586 #if defined(PETSC_USE_CTABLE)
587             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
593               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
594               col = in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ *)B->data;
598               bimax = b->imax;
599               bi    = b->i;
600               bilen = b->ilen;
601               bj    = b->j;
602               ba    = b->a;
603               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
604               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
612               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
613                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
614               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
615             }
616           } else col = in[j];
617           nonew = b->nonew;
618           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
619         }
620       }
621     } else {
622       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
623       if (!aij->donotstash) {
624         mat->assembled = PETSC_FALSE;
625         if (roworiented) {
626           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
627         } else {
628           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
629         }
630       }
631     }
632   }
633   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
634   PetscCall(MatSeqAIJRestoreArray(B, &ba));
635   PetscFunctionReturn(PETSC_SUCCESS);
636 }
637 
638 /*
639     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
640     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
641     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
642 */
643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
644 {
645   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
646   Mat         A      = aij->A; /* diagonal part of the matrix */
647   Mat         B      = aij->B; /* off-diagonal part of the matrix */
648   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
649   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
650   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
651   PetscInt   *ailen = a->ilen, *aj = a->j;
652   PetscInt   *bilen = b->ilen, *bj = b->j;
653   PetscInt    am          = aij->A->rmap->n, j;
654   PetscInt    diag_so_far = 0, dnz;
655   PetscInt    offd_so_far = 0, onz;
656 
657   PetscFunctionBegin;
658   /* Iterate over all rows of the matrix */
659   for (j = 0; j < am; j++) {
660     dnz = onz = 0;
661     /*  Iterate over all non-zero columns of the current row */
662     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
663       /* If column is in the diagonal */
664       if (mat_j[col] >= cstart && mat_j[col] < cend) {
665         aj[diag_so_far++] = mat_j[col] - cstart;
666         dnz++;
667       } else { /* off-diagonal entries */
668         bj[offd_so_far++] = mat_j[col];
669         onz++;
670       }
671     }
672     ailen[j] = dnz;
673     bilen[j] = onz;
674   }
675   PetscFunctionReturn(PETSC_SUCCESS);
676 }
677 
678 /*
679     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
680     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
681     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
682     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
683     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
684 */
685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
686 {
687   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
688   Mat          A    = aij->A; /* diagonal part of the matrix */
689   Mat          B    = aij->B; /* off-diagonal part of the matrix */
690   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
691   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
692   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
693   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
694   PetscInt    *ailen = a->ilen, *aj = a->j;
695   PetscInt    *bilen = b->ilen, *bj = b->j;
696   PetscInt     am          = aij->A->rmap->n, j;
697   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
698   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
699   PetscScalar *aa = a->a, *ba = b->a;
700 
701   PetscFunctionBegin;
702   /* Iterate over all rows of the matrix */
703   for (j = 0; j < am; j++) {
704     dnz_row = onz_row = 0;
705     rowstart_offd     = full_offd_i[j];
706     rowstart_diag     = full_diag_i[j];
707     /*  Iterate over all non-zero columns of the current row */
708     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
709       /* If column is in the diagonal */
710       if (mat_j[col] >= cstart && mat_j[col] < cend) {
711         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
712         aa[rowstart_diag + dnz_row] = mat_a[col];
713         dnz_row++;
714       } else { /* off-diagonal entries */
715         bj[rowstart_offd + onz_row] = mat_j[col];
716         ba[rowstart_offd + onz_row] = mat_a[col];
717         onz_row++;
718       }
719     }
720     ailen[j] = dnz_row;
721     bilen[j] = onz_row;
722   }
723   PetscFunctionReturn(PETSC_SUCCESS);
724 }
725 
726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
727 {
728   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
729   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
730   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
731 
732   PetscFunctionBegin;
733   for (i = 0; i < m; i++) {
734     if (idxm[i] < 0) continue; /* negative row */
735     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
736     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
737     row = idxm[i] - rstart;
738     for (j = 0; j < n; j++) {
739       if (idxn[j] < 0) continue; /* negative column */
740       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
741       if (idxn[j] >= cstart && idxn[j] < cend) {
742         col = idxn[j] - cstart;
743         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
744       } else {
745         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
746 #if defined(PETSC_USE_CTABLE)
747         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
748         col--;
749 #else
750         col = aij->colmap[idxn[j]] - 1;
751 #endif
752         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
753         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
754       }
755     }
756   }
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
761 {
762   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
763   PetscInt    nstash, reallocs;
764 
765   PetscFunctionBegin;
766   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
767 
768   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
769   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
770   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
771   PetscFunctionReturn(PETSC_SUCCESS);
772 }
773 
774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
775 {
776   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
777   PetscMPIInt  n;
778   PetscInt     i, j, rstart, ncols, flg;
779   PetscInt    *row, *col;
780   PetscBool    other_disassembled;
781   PetscScalar *val;
782 
783   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
784 
785   PetscFunctionBegin;
786   if (!aij->donotstash && !mat->nooffprocentries) {
787     while (1) {
788       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
789       if (!flg) break;
790 
791       for (i = 0; i < n;) {
792         /* Now identify the consecutive vals belonging to the same row */
793         for (j = i, rstart = row[j]; j < n; j++) {
794           if (row[j] != rstart) break;
795         }
796         if (j < n) ncols = j - i;
797         else ncols = n - i;
798         /* Now assemble all these values with a single function call */
799         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
800         i = j;
801       }
802     }
803     PetscCall(MatStashScatterEnd_Private(&mat->stash));
804   }
805 #if defined(PETSC_HAVE_DEVICE)
806   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
807   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
808   if (mat->boundtocpu) {
809     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
810     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
811   }
812 #endif
813   PetscCall(MatAssemblyBegin(aij->A, mode));
814   PetscCall(MatAssemblyEnd(aij->A, mode));
815 
816   /* determine if any processor has disassembled, if so we must
817      also disassemble ourself, in order that we may reassemble. */
818   /*
819      if nonzero structure of submatrix B cannot change then we know that
820      no processor disassembled thus we can skip this stuff
821   */
822   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
823     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
824     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
825       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
826     }
827   }
828   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
829   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
830 #if defined(PETSC_HAVE_DEVICE)
831   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
832 #endif
833   PetscCall(MatAssemblyBegin(aij->B, mode));
834   PetscCall(MatAssemblyEnd(aij->B, mode));
835 
836   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
837 
838   aij->rowvalues = NULL;
839 
840   PetscCall(VecDestroy(&aij->diag));
841 
842   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
843   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
844     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
845     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
846   }
847 #if defined(PETSC_HAVE_DEVICE)
848   mat->offloadmask = PETSC_OFFLOAD_BOTH;
849 #endif
850   PetscFunctionReturn(PETSC_SUCCESS);
851 }
852 
853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
854 {
855   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
856 
857   PetscFunctionBegin;
858   PetscCall(MatZeroEntries(l->A));
859   PetscCall(MatZeroEntries(l->B));
860   PetscFunctionReturn(PETSC_SUCCESS);
861 }
862 
863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
864 {
865   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
866   PetscInt   *lrows;
867   PetscInt    r, len;
868   PetscBool   cong;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
873   PetscCall(MatHasCongruentLayouts(A, &cong));
874   /* fix right-hand side if needed */
875   if (x && b) {
876     const PetscScalar *xx;
877     PetscScalar       *bb;
878 
879     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
880     PetscCall(VecGetArrayRead(x, &xx));
881     PetscCall(VecGetArray(b, &bb));
882     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
883     PetscCall(VecRestoreArrayRead(x, &xx));
884     PetscCall(VecRestoreArray(b, &bb));
885   }
886 
887   if (diag != 0.0 && cong) {
888     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
889     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
890   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
891     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
892     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
893     PetscInt    nnwA, nnwB;
894     PetscBool   nnzA, nnzB;
895 
896     nnwA = aijA->nonew;
897     nnwB = aijB->nonew;
898     nnzA = aijA->keepnonzeropattern;
899     nnzB = aijB->keepnonzeropattern;
900     if (!nnzA) {
901       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
902       aijA->nonew = 0;
903     }
904     if (!nnzB) {
905       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
906       aijB->nonew = 0;
907     }
908     /* Must zero here before the next loop */
909     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
910     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
911     for (r = 0; r < len; ++r) {
912       const PetscInt row = lrows[r] + A->rmap->rstart;
913       if (row >= A->cmap->N) continue;
914       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
915     }
916     aijA->nonew = nnwA;
917     aijB->nonew = nnwB;
918   } else {
919     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
920     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
921   }
922   PetscCall(PetscFree(lrows));
923   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
924   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
928     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
929     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
930   }
931   PetscFunctionReturn(PETSC_SUCCESS);
932 }
933 
934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
935 {
936   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
937   PetscInt           n = A->rmap->n;
938   PetscInt           i, j, r, m, len = 0;
939   PetscInt          *lrows, *owners = A->rmap->range;
940   PetscMPIInt        p = 0;
941   PetscSFNode       *rrows;
942   PetscSF            sf;
943   const PetscScalar *xx;
944   PetscScalar       *bb, *mask, *aij_a;
945   Vec                xmask, lmask;
946   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
947   const PetscInt    *aj, *ii, *ridx;
948   PetscScalar       *aa;
949 
950   PetscFunctionBegin;
951   /* Create SF where leaves are input rows and roots are owned rows */
952   PetscCall(PetscMalloc1(n, &lrows));
953   for (r = 0; r < n; ++r) lrows[r] = -1;
954   PetscCall(PetscMalloc1(N, &rrows));
955   for (r = 0; r < N; ++r) {
956     const PetscInt idx = rows[r];
957     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
958     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
959       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
960     }
961     rrows[r].rank  = p;
962     rrows[r].index = rows[r] - owners[p];
963   }
964   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
965   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
966   /* Collect flags for rows to be zeroed */
967   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
968   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
969   PetscCall(PetscSFDestroy(&sf));
970   /* Compress and put in row numbers */
971   for (r = 0; r < n; ++r)
972     if (lrows[r] >= 0) lrows[len++] = r;
973   /* zero diagonal part of matrix */
974   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
975   /* handle off-diagonal part of matrix */
976   PetscCall(MatCreateVecs(A, &xmask, NULL));
977   PetscCall(VecDuplicate(l->lvec, &lmask));
978   PetscCall(VecGetArray(xmask, &bb));
979   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
980   PetscCall(VecRestoreArray(xmask, &bb));
981   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
982   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
983   PetscCall(VecDestroy(&xmask));
984   if (x && b) { /* this code is buggy when the row and column layout don't match */
985     PetscBool cong;
986 
987     PetscCall(MatHasCongruentLayouts(A, &cong));
988     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
989     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
990     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
991     PetscCall(VecGetArrayRead(l->lvec, &xx));
992     PetscCall(VecGetArray(b, &bb));
993   }
994   PetscCall(VecGetArray(lmask, &mask));
995   /* remove zeroed rows of off-diagonal matrix */
996   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
997   ii = aij->i;
998   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
999   /* loop over all elements of off process part of matrix zeroing removed columns*/
1000   if (aij->compressedrow.use) {
1001     m    = aij->compressedrow.nrows;
1002     ii   = aij->compressedrow.i;
1003     ridx = aij->compressedrow.rindex;
1004     for (i = 0; i < m; i++) {
1005       n  = ii[i + 1] - ii[i];
1006       aj = aij->j + ii[i];
1007       aa = aij_a + ii[i];
1008 
1009       for (j = 0; j < n; j++) {
1010         if (PetscAbsScalar(mask[*aj])) {
1011           if (b) bb[*ridx] -= *aa * xx[*aj];
1012           *aa = 0.0;
1013         }
1014         aa++;
1015         aj++;
1016       }
1017       ridx++;
1018     }
1019   } else { /* do not use compressed row format */
1020     m = l->B->rmap->n;
1021     for (i = 0; i < m; i++) {
1022       n  = ii[i + 1] - ii[i];
1023       aj = aij->j + ii[i];
1024       aa = aij_a + ii[i];
1025       for (j = 0; j < n; j++) {
1026         if (PetscAbsScalar(mask[*aj])) {
1027           if (b) bb[i] -= *aa * xx[*aj];
1028           *aa = 0.0;
1029         }
1030         aa++;
1031         aj++;
1032       }
1033     }
1034   }
1035   if (x && b) {
1036     PetscCall(VecRestoreArray(b, &bb));
1037     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1038   }
1039   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1040   PetscCall(VecRestoreArray(lmask, &mask));
1041   PetscCall(VecDestroy(&lmask));
1042   PetscCall(PetscFree(lrows));
1043 
1044   /* only change matrix nonzero state if pattern was allowed to be changed */
1045   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1046     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1047     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1048   }
1049   PetscFunctionReturn(PETSC_SUCCESS);
1050 }
1051 
1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1053 {
1054   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1055   PetscInt    nt;
1056   VecScatter  Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   PetscCall(VecGetLocalSize(xx, &nt));
1060   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1061   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1062   PetscUseTypeMethod(a->A, mult, xx, yy);
1063   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1065   PetscFunctionReturn(PETSC_SUCCESS);
1066 }
1067 
1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1069 {
1070   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1071 
1072   PetscFunctionBegin;
1073   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1078 {
1079   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1080   VecScatter  Mvctx = a->Mvctx;
1081 
1082   PetscFunctionBegin;
1083   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1084   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1085   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1086   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1087   PetscFunctionReturn(PETSC_SUCCESS);
1088 }
1089 
1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1091 {
1092   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1093 
1094   PetscFunctionBegin;
1095   /* do nondiagonal part */
1096   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1097   /* do local part */
1098   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1099   /* add partial results together */
1100   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1101   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1102   PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104 
1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1106 {
1107   MPI_Comm    comm;
1108   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1109   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1110   IS          Me, Notme;
1111   PetscInt    M, N, first, last, *notme, i;
1112   PetscBool   lf;
1113   PetscMPIInt size;
1114 
1115   PetscFunctionBegin;
1116   /* Easy test: symmetric diagonal block */
1117   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1118   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1119   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1120   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1121   PetscCallMPI(MPI_Comm_size(comm, &size));
1122   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1123 
1124   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1125   PetscCall(MatGetSize(Amat, &M, &N));
1126   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1127   PetscCall(PetscMalloc1(N - last + first, &notme));
1128   for (i = 0; i < first; i++) notme[i] = i;
1129   for (i = last; i < M; i++) notme[i - last + first] = i;
1130   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1131   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1132   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1133   Aoff = Aoffs[0];
1134   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1135   Boff = Boffs[0];
1136   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1137   PetscCall(MatDestroyMatrices(1, &Aoffs));
1138   PetscCall(MatDestroyMatrices(1, &Boffs));
1139   PetscCall(ISDestroy(&Me));
1140   PetscCall(ISDestroy(&Notme));
1141   PetscCall(PetscFree(notme));
1142   PetscFunctionReturn(PETSC_SUCCESS);
1143 }
1144 
1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1146 {
1147   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1148 
1149   PetscFunctionBegin;
1150   /* do nondiagonal part */
1151   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1152   /* do local part */
1153   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1154   /* add partial results together */
1155   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1156   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1157   PetscFunctionReturn(PETSC_SUCCESS);
1158 }
1159 
1160 /*
1161   This only works correctly for square matrices where the subblock A->A is the
1162    diagonal block
1163 */
1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1165 {
1166   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1167 
1168   PetscFunctionBegin;
1169   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1170   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1171   PetscCall(MatGetDiagonal(a->A, v));
1172   PetscFunctionReturn(PETSC_SUCCESS);
1173 }
1174 
1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1176 {
1177   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(MatScale(a->A, aa));
1181   PetscCall(MatScale(a->B, aa));
1182   PetscFunctionReturn(PETSC_SUCCESS);
1183 }
1184 
1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1186 {
1187   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1188   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1189   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1190   const PetscInt    *garray = aij->garray;
1191   const PetscScalar *aa, *ba;
1192   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1193   PetscInt64         nz, hnz;
1194   PetscInt          *rowlens;
1195   PetscInt          *colidxs;
1196   PetscScalar       *matvals;
1197   PetscMPIInt        rank;
1198 
1199   PetscFunctionBegin;
1200   PetscCall(PetscViewerSetUp(viewer));
1201 
1202   M  = mat->rmap->N;
1203   N  = mat->cmap->N;
1204   m  = mat->rmap->n;
1205   rs = mat->rmap->rstart;
1206   cs = mat->cmap->rstart;
1207   nz = A->nz + B->nz;
1208 
1209   /* write matrix header */
1210   header[0] = MAT_FILE_CLASSID;
1211   header[1] = M;
1212   header[2] = N;
1213   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1214   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1215   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1216   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1217 
1218   /* fill in and store row lengths  */
1219   PetscCall(PetscMalloc1(m, &rowlens));
1220   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1221   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1222   PetscCall(PetscFree(rowlens));
1223 
1224   /* fill in and store column indices */
1225   PetscCall(PetscMalloc1(nz, &colidxs));
1226   for (cnt = 0, i = 0; i < m; i++) {
1227     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1228       if (garray[B->j[jb]] > cs) break;
1229       colidxs[cnt++] = garray[B->j[jb]];
1230     }
1231     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1232     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1233   }
1234   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1235   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1236   PetscCall(PetscFree(colidxs));
1237 
1238   /* fill in and store nonzero values */
1239   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1240   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1241   PetscCall(PetscMalloc1(nz, &matvals));
1242   for (cnt = 0, i = 0; i < m; i++) {
1243     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1244       if (garray[B->j[jb]] > cs) break;
1245       matvals[cnt++] = ba[jb];
1246     }
1247     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1248     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1249   }
1250   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1251   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1252   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1253   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1254   PetscCall(PetscFree(matvals));
1255 
1256   /* write block size option to the viewer's .info file */
1257   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1258   PetscFunctionReturn(PETSC_SUCCESS);
1259 }
1260 
1261 #include <petscdraw.h>
1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1263 {
1264   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1265   PetscMPIInt       rank = aij->rank, size = aij->size;
1266   PetscBool         isdraw, iascii, isbinary;
1267   PetscViewer       sviewer;
1268   PetscViewerFormat format;
1269 
1270   PetscFunctionBegin;
1271   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1272   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1273   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1274   if (iascii) {
1275     PetscCall(PetscViewerGetFormat(viewer, &format));
1276     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1277       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1278       PetscCall(PetscMalloc1(size, &nz));
1279       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1280       for (i = 0; i < size; i++) {
1281         nmax = PetscMax(nmax, nz[i]);
1282         nmin = PetscMin(nmin, nz[i]);
1283         navg += nz[i];
1284       }
1285       PetscCall(PetscFree(nz));
1286       navg = navg / size;
1287       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1288       PetscFunctionReturn(PETSC_SUCCESS);
1289     }
1290     PetscCall(PetscViewerGetFormat(viewer, &format));
1291     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1292       MatInfo   info;
1293       PetscInt *inodes = NULL;
1294 
1295       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1296       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1297       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1298       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1299       if (!inodes) {
1300         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1301                                                      info.memory));
1302       } else {
1303         PetscCall(
1304           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory));
1305       }
1306       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1307       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1308       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1309       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1310       PetscCall(PetscViewerFlush(viewer));
1311       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1312       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1313       PetscCall(VecScatterView(aij->Mvctx, viewer));
1314       PetscFunctionReturn(PETSC_SUCCESS);
1315     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1316       PetscInt inodecount, inodelimit, *inodes;
1317       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1318       if (inodes) {
1319         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1320       } else {
1321         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1322       }
1323       PetscFunctionReturn(PETSC_SUCCESS);
1324     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1325       PetscFunctionReturn(PETSC_SUCCESS);
1326     }
1327   } else if (isbinary) {
1328     if (size == 1) {
1329       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1330       PetscCall(MatView(aij->A, viewer));
1331     } else {
1332       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1333     }
1334     PetscFunctionReturn(PETSC_SUCCESS);
1335   } else if (iascii && size == 1) {
1336     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1337     PetscCall(MatView(aij->A, viewer));
1338     PetscFunctionReturn(PETSC_SUCCESS);
1339   } else if (isdraw) {
1340     PetscDraw draw;
1341     PetscBool isnull;
1342     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1343     PetscCall(PetscDrawIsNull(draw, &isnull));
1344     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1345   }
1346 
1347   { /* assemble the entire matrix onto first processor */
1348     Mat A = NULL, Av;
1349     IS  isrow, iscol;
1350 
1351     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1352     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1353     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1354     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1355     /*  The commented code uses MatCreateSubMatrices instead */
1356     /*
1357     Mat *AA, A = NULL, Av;
1358     IS  isrow,iscol;
1359 
1360     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1361     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1362     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1363     if (rank == 0) {
1364        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1365        A    = AA[0];
1366        Av   = AA[0];
1367     }
1368     PetscCall(MatDestroySubMatrices(1,&AA));
1369 */
1370     PetscCall(ISDestroy(&iscol));
1371     PetscCall(ISDestroy(&isrow));
1372     /*
1373        Everyone has to call to draw the matrix since the graphics waits are
1374        synchronized across all processors that share the PetscDraw object
1375     */
1376     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1377     if (rank == 0) {
1378       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1379       PetscCall(MatView_SeqAIJ(Av, sviewer));
1380     }
1381     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1382     PetscCall(MatDestroy(&A));
1383   }
1384   PetscFunctionReturn(PETSC_SUCCESS);
1385 }
1386 
1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1388 {
1389   PetscBool iascii, isdraw, issocket, isbinary;
1390 
1391   PetscFunctionBegin;
1392   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1393   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1395   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1396   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1397   PetscFunctionReturn(PETSC_SUCCESS);
1398 }
1399 
1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1401 {
1402   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1403   Vec         bb1 = NULL;
1404   PetscBool   hasop;
1405 
1406   PetscFunctionBegin;
1407   if (flag == SOR_APPLY_UPPER) {
1408     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1409     PetscFunctionReturn(PETSC_SUCCESS);
1410   }
1411 
1412   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1413 
1414   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419 
1420     while (its--) {
1421       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1423 
1424       /* update rhs: bb1 = bb - B*x */
1425       PetscCall(VecScale(mat->lvec, -1.0));
1426       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1427 
1428       /* local sweep */
1429       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1430     }
1431   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1432     if (flag & SOR_ZERO_INITIAL_GUESS) {
1433       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1434       its--;
1435     }
1436     while (its--) {
1437       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1439 
1440       /* update rhs: bb1 = bb - B*x */
1441       PetscCall(VecScale(mat->lvec, -1.0));
1442       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1443 
1444       /* local sweep */
1445       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1446     }
1447   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1448     if (flag & SOR_ZERO_INITIAL_GUESS) {
1449       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1450       its--;
1451     }
1452     while (its--) {
1453       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1455 
1456       /* update rhs: bb1 = bb - B*x */
1457       PetscCall(VecScale(mat->lvec, -1.0));
1458       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1459 
1460       /* local sweep */
1461       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1462     }
1463   } else if (flag & SOR_EISENSTAT) {
1464     Vec xx1;
1465 
1466     PetscCall(VecDuplicate(bb, &xx1));
1467     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1468 
1469     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1470     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1471     if (!mat->diag) {
1472       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1473       PetscCall(MatGetDiagonal(matin, mat->diag));
1474     }
1475     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1476     if (hasop) {
1477       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1478     } else {
1479       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1480     }
1481     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1482 
1483     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1484 
1485     /* local sweep */
1486     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1487     PetscCall(VecAXPY(xx, 1.0, xx1));
1488     PetscCall(VecDestroy(&xx1));
1489   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1490 
1491   PetscCall(VecDestroy(&bb1));
1492 
1493   matin->factorerrortype = mat->A->factorerrortype;
1494   PetscFunctionReturn(PETSC_SUCCESS);
1495 }
1496 
1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1498 {
1499   Mat             aA, aB, Aperm;
1500   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1501   PetscScalar    *aa, *ba;
1502   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1503   PetscSF         rowsf, sf;
1504   IS              parcolp = NULL;
1505   PetscBool       done;
1506 
1507   PetscFunctionBegin;
1508   PetscCall(MatGetLocalSize(A, &m, &n));
1509   PetscCall(ISGetIndices(rowp, &rwant));
1510   PetscCall(ISGetIndices(colp, &cwant));
1511   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1512 
1513   /* Invert row permutation to find out where my rows should go */
1514   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1515   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1516   PetscCall(PetscSFSetFromOptions(rowsf));
1517   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1518   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1519   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1520 
1521   /* Invert column permutation to find out where my columns should go */
1522   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1523   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1524   PetscCall(PetscSFSetFromOptions(sf));
1525   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1526   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1527   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1528   PetscCall(PetscSFDestroy(&sf));
1529 
1530   PetscCall(ISRestoreIndices(rowp, &rwant));
1531   PetscCall(ISRestoreIndices(colp, &cwant));
1532   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1533 
1534   /* Find out where my gcols should go */
1535   PetscCall(MatGetSize(aB, NULL, &ng));
1536   PetscCall(PetscMalloc1(ng, &gcdest));
1537   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1538   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1539   PetscCall(PetscSFSetFromOptions(sf));
1540   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1541   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1542   PetscCall(PetscSFDestroy(&sf));
1543 
1544   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1545   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1546   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1547   for (i = 0; i < m; i++) {
1548     PetscInt    row = rdest[i];
1549     PetscMPIInt rowner;
1550     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1551     for (j = ai[i]; j < ai[i + 1]; j++) {
1552       PetscInt    col = cdest[aj[j]];
1553       PetscMPIInt cowner;
1554       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1555       if (rowner == cowner) dnnz[i]++;
1556       else onnz[i]++;
1557     }
1558     for (j = bi[i]; j < bi[i + 1]; j++) {
1559       PetscInt    col = gcdest[bj[j]];
1560       PetscMPIInt cowner;
1561       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1562       if (rowner == cowner) dnnz[i]++;
1563       else onnz[i]++;
1564     }
1565   }
1566   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1567   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1568   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1569   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1570   PetscCall(PetscSFDestroy(&rowsf));
1571 
1572   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1573   PetscCall(MatSeqAIJGetArray(aA, &aa));
1574   PetscCall(MatSeqAIJGetArray(aB, &ba));
1575   for (i = 0; i < m; i++) {
1576     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1577     PetscInt  j0, rowlen;
1578     rowlen = ai[i + 1] - ai[i];
1579     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1580       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1581       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1582     }
1583     rowlen = bi[i + 1] - bi[i];
1584     for (j0 = j = 0; j < rowlen; j0 = j) {
1585       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1586       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1587     }
1588   }
1589   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1590   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1591   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1592   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1593   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1594   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1595   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1596   PetscCall(PetscFree3(work, rdest, cdest));
1597   PetscCall(PetscFree(gcdest));
1598   if (parcolp) PetscCall(ISDestroy(&colp));
1599   *B = Aperm;
1600   PetscFunctionReturn(PETSC_SUCCESS);
1601 }
1602 
1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1604 {
1605   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1606 
1607   PetscFunctionBegin;
1608   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1609   if (ghosts) *ghosts = aij->garray;
1610   PetscFunctionReturn(PETSC_SUCCESS);
1611 }
1612 
1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1614 {
1615   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1616   Mat            A = mat->A, B = mat->B;
1617   PetscLogDouble isend[5], irecv[5];
1618 
1619   PetscFunctionBegin;
1620   info->block_size = 1.0;
1621   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1622 
1623   isend[0] = info->nz_used;
1624   isend[1] = info->nz_allocated;
1625   isend[2] = info->nz_unneeded;
1626   isend[3] = info->memory;
1627   isend[4] = info->mallocs;
1628 
1629   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1630 
1631   isend[0] += info->nz_used;
1632   isend[1] += info->nz_allocated;
1633   isend[2] += info->nz_unneeded;
1634   isend[3] += info->memory;
1635   isend[4] += info->mallocs;
1636   if (flag == MAT_LOCAL) {
1637     info->nz_used      = isend[0];
1638     info->nz_allocated = isend[1];
1639     info->nz_unneeded  = isend[2];
1640     info->memory       = isend[3];
1641     info->mallocs      = isend[4];
1642   } else if (flag == MAT_GLOBAL_MAX) {
1643     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1644 
1645     info->nz_used      = irecv[0];
1646     info->nz_allocated = irecv[1];
1647     info->nz_unneeded  = irecv[2];
1648     info->memory       = irecv[3];
1649     info->mallocs      = irecv[4];
1650   } else if (flag == MAT_GLOBAL_SUM) {
1651     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1652 
1653     info->nz_used      = irecv[0];
1654     info->nz_allocated = irecv[1];
1655     info->nz_unneeded  = irecv[2];
1656     info->memory       = irecv[3];
1657     info->mallocs      = irecv[4];
1658   }
1659   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1660   info->fill_ratio_needed = 0;
1661   info->factor_mallocs    = 0;
1662   PetscFunctionReturn(PETSC_SUCCESS);
1663 }
1664 
1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1666 {
1667   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1668 
1669   PetscFunctionBegin;
1670   switch (op) {
1671   case MAT_NEW_NONZERO_LOCATIONS:
1672   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1673   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1674   case MAT_KEEP_NONZERO_PATTERN:
1675   case MAT_NEW_NONZERO_LOCATION_ERR:
1676   case MAT_USE_INODES:
1677   case MAT_IGNORE_ZERO_ENTRIES:
1678   case MAT_FORM_EXPLICIT_TRANSPOSE:
1679     MatCheckPreallocated(A, 1);
1680     PetscCall(MatSetOption(a->A, op, flg));
1681     PetscCall(MatSetOption(a->B, op, flg));
1682     break;
1683   case MAT_ROW_ORIENTED:
1684     MatCheckPreallocated(A, 1);
1685     a->roworiented = flg;
1686 
1687     PetscCall(MatSetOption(a->A, op, flg));
1688     PetscCall(MatSetOption(a->B, op, flg));
1689     break;
1690   case MAT_IGNORE_OFF_PROC_ENTRIES:
1691     a->donotstash = flg;
1692     break;
1693   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1694   case MAT_SPD:
1695   case MAT_SYMMETRIC:
1696   case MAT_STRUCTURALLY_SYMMETRIC:
1697   case MAT_HERMITIAN:
1698   case MAT_SYMMETRY_ETERNAL:
1699   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1700   case MAT_SPD_ETERNAL:
1701     /* if the diagonal matrix is square it inherits some of the properties above */
1702     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1703     break;
1704   case MAT_SUBMAT_SINGLEIS:
1705     A->submat_singleis = flg;
1706     break;
1707   default:
1708     break;
1709   }
1710   PetscFunctionReturn(PETSC_SUCCESS);
1711 }
1712 
1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1714 {
1715   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1716   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1717   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1718   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1719   PetscInt    *cmap, *idx_p;
1720 
1721   PetscFunctionBegin;
1722   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1723   mat->getrowactive = PETSC_TRUE;
1724 
1725   if (!mat->rowvalues && (idx || v)) {
1726     /*
1727         allocate enough space to hold information from the longest row.
1728     */
1729     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1730     PetscInt    max = 1, tmp;
1731     for (i = 0; i < matin->rmap->n; i++) {
1732       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1733       if (max < tmp) max = tmp;
1734     }
1735     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1736   }
1737 
1738   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1739   lrow = row - rstart;
1740 
1741   pvA = &vworkA;
1742   pcA = &cworkA;
1743   pvB = &vworkB;
1744   pcB = &cworkB;
1745   if (!v) {
1746     pvA = NULL;
1747     pvB = NULL;
1748   }
1749   if (!idx) {
1750     pcA = NULL;
1751     if (!v) pcB = NULL;
1752   }
1753   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1754   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1755   nztot = nzA + nzB;
1756 
1757   cmap = mat->garray;
1758   if (v || idx) {
1759     if (nztot) {
1760       /* Sort by increasing column numbers, assuming A and B already sorted */
1761       PetscInt imark = -1;
1762       if (v) {
1763         *v = v_p = mat->rowvalues;
1764         for (i = 0; i < nzB; i++) {
1765           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1766           else break;
1767         }
1768         imark = i;
1769         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1770         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1771       }
1772       if (idx) {
1773         *idx = idx_p = mat->rowindices;
1774         if (imark > -1) {
1775           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1776         } else {
1777           for (i = 0; i < nzB; i++) {
1778             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1779             else break;
1780           }
1781           imark = i;
1782         }
1783         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1784         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1785       }
1786     } else {
1787       if (idx) *idx = NULL;
1788       if (v) *v = NULL;
1789     }
1790   }
1791   *nz = nztot;
1792   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1793   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1798 {
1799   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1800 
1801   PetscFunctionBegin;
1802   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1803   aij->getrowactive = PETSC_FALSE;
1804   PetscFunctionReturn(PETSC_SUCCESS);
1805 }
1806 
1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1808 {
1809   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1810   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1811   PetscInt         i, j, cstart = mat->cmap->rstart;
1812   PetscReal        sum = 0.0;
1813   const MatScalar *v, *amata, *bmata;
1814 
1815   PetscFunctionBegin;
1816   if (aij->size == 1) {
1817     PetscCall(MatNorm(aij->A, type, norm));
1818   } else {
1819     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1820     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1821     if (type == NORM_FROBENIUS) {
1822       v = amata;
1823       for (i = 0; i < amat->nz; i++) {
1824         sum += PetscRealPart(PetscConj(*v) * (*v));
1825         v++;
1826       }
1827       v = bmata;
1828       for (i = 0; i < bmat->nz; i++) {
1829         sum += PetscRealPart(PetscConj(*v) * (*v));
1830         v++;
1831       }
1832       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1833       *norm = PetscSqrtReal(*norm);
1834       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1835     } else if (type == NORM_1) { /* max column norm */
1836       PetscReal *tmp;
1837       PetscInt  *jj, *garray = aij->garray;
1838       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1839       *norm = 0.0;
1840       v     = amata;
1841       jj    = amat->j;
1842       for (j = 0; j < amat->nz; j++) {
1843         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1844         v++;
1845       }
1846       v  = bmata;
1847       jj = bmat->j;
1848       for (j = 0; j < bmat->nz; j++) {
1849         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1850         v++;
1851       }
1852       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1853       for (j = 0; j < mat->cmap->N; j++) {
1854         if (tmp[j] > *norm) *norm = tmp[j];
1855       }
1856       PetscCall(PetscFree(tmp));
1857       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1858     } else if (type == NORM_INFINITY) { /* max row norm */
1859       PetscReal ntemp = 0.0;
1860       for (j = 0; j < aij->A->rmap->n; j++) {
1861         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1862         sum = 0.0;
1863         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1864           sum += PetscAbsScalar(*v);
1865           v++;
1866         }
1867         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1868         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1869           sum += PetscAbsScalar(*v);
1870           v++;
1871         }
1872         if (sum > ntemp) ntemp = sum;
1873       }
1874       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1875       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1876     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1877     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1878     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1879   }
1880   PetscFunctionReturn(PETSC_SUCCESS);
1881 }
1882 
1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1884 {
1885   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1886   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1887   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1888   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1889   Mat              B, A_diag, *B_diag;
1890   const MatScalar *pbv, *bv;
1891 
1892   PetscFunctionBegin;
1893   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1894   ma = A->rmap->n;
1895   na = A->cmap->n;
1896   mb = a->B->rmap->n;
1897   nb = a->B->cmap->n;
1898   ai = Aloc->i;
1899   aj = Aloc->j;
1900   bi = Bloc->i;
1901   bj = Bloc->j;
1902   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1903     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1904     PetscSFNode         *oloc;
1905     PETSC_UNUSED PetscSF sf;
1906 
1907     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1908     /* compute d_nnz for preallocation */
1909     PetscCall(PetscArrayzero(d_nnz, na));
1910     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1911     /* compute local off-diagonal contributions */
1912     PetscCall(PetscArrayzero(g_nnz, nb));
1913     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1914     /* map those to global */
1915     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1916     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1917     PetscCall(PetscSFSetFromOptions(sf));
1918     PetscCall(PetscArrayzero(o_nnz, na));
1919     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1920     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1921     PetscCall(PetscSFDestroy(&sf));
1922 
1923     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1924     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1925     PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs));
1926     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1927     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1928     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1929   } else {
1930     B = *matout;
1931     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1932   }
1933 
1934   b           = (Mat_MPIAIJ *)B->data;
1935   A_diag      = a->A;
1936   B_diag      = &b->A;
1937   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1938   A_diag_ncol = A_diag->cmap->N;
1939   B_diag_ilen = sub_B_diag->ilen;
1940   B_diag_i    = sub_B_diag->i;
1941 
1942   /* Set ilen for diagonal of B */
1943   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1944 
1945   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1946   very quickly (=without using MatSetValues), because all writes are local. */
1947   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1948   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1949 
1950   /* copy over the B part */
1951   PetscCall(PetscMalloc1(bi[mb], &cols));
1952   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1953   pbv = bv;
1954   row = A->rmap->rstart;
1955   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1956   cols_tmp = cols;
1957   for (i = 0; i < mb; i++) {
1958     ncol = bi[i + 1] - bi[i];
1959     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1960     row++;
1961     if (pbv) pbv += ncol;
1962     if (cols_tmp) cols_tmp += ncol;
1963   }
1964   PetscCall(PetscFree(cols));
1965   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1966 
1967   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1968   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1969   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1970     *matout = B;
1971   } else {
1972     PetscCall(MatHeaderMerge(A, &B));
1973   }
1974   PetscFunctionReturn(PETSC_SUCCESS);
1975 }
1976 
1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1978 {
1979   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1980   Mat         a = aij->A, b = aij->B;
1981   PetscInt    s1, s2, s3;
1982 
1983   PetscFunctionBegin;
1984   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1985   if (rr) {
1986     PetscCall(VecGetLocalSize(rr, &s1));
1987     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1988     /* Overlap communication with computation. */
1989     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1990   }
1991   if (ll) {
1992     PetscCall(VecGetLocalSize(ll, &s1));
1993     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1994     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1995   }
1996   /* scale  the diagonal block */
1997   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1998 
1999   if (rr) {
2000     /* Do a scatter end and then right scale the off-diagonal block */
2001     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2002     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2003   }
2004   PetscFunctionReturn(PETSC_SUCCESS);
2005 }
2006 
2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2008 {
2009   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2010 
2011   PetscFunctionBegin;
2012   PetscCall(MatSetUnfactored(a->A));
2013   PetscFunctionReturn(PETSC_SUCCESS);
2014 }
2015 
2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2017 {
2018   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2019   Mat         a, b, c, d;
2020   PetscBool   flg;
2021 
2022   PetscFunctionBegin;
2023   a = matA->A;
2024   b = matA->B;
2025   c = matB->A;
2026   d = matB->B;
2027 
2028   PetscCall(MatEqual(a, c, &flg));
2029   if (flg) PetscCall(MatEqual(b, d, &flg));
2030   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2035 {
2036   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2037   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2038 
2039   PetscFunctionBegin;
2040   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2041   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2042     /* because of the column compression in the off-processor part of the matrix a->B,
2043        the number of columns in a->B and b->B may be different, hence we cannot call
2044        the MatCopy() directly on the two parts. If need be, we can provide a more
2045        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2046        then copying the submatrices */
2047     PetscCall(MatCopy_Basic(A, B, str));
2048   } else {
2049     PetscCall(MatCopy(a->A, b->A, str));
2050     PetscCall(MatCopy(a->B, b->B, str));
2051   }
2052   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2053   PetscFunctionReturn(PETSC_SUCCESS);
2054 }
2055 
2056 /*
2057    Computes the number of nonzeros per row needed for preallocation when X and Y
2058    have different nonzero structure.
2059 */
2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2061 {
2062   PetscInt i, j, k, nzx, nzy;
2063 
2064   PetscFunctionBegin;
2065   /* Set the number of nonzeros in the new matrix */
2066   for (i = 0; i < m; i++) {
2067     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2068     nzx    = xi[i + 1] - xi[i];
2069     nzy    = yi[i + 1] - yi[i];
2070     nnz[i] = 0;
2071     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2072       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2073       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2074       nnz[i]++;
2075     }
2076     for (; k < nzy; k++) nnz[i]++;
2077   }
2078   PetscFunctionReturn(PETSC_SUCCESS);
2079 }
2080 
2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2083 {
2084   PetscInt    m = Y->rmap->N;
2085   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2086   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2087 
2088   PetscFunctionBegin;
2089   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2090   PetscFunctionReturn(PETSC_SUCCESS);
2091 }
2092 
2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2094 {
2095   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2096 
2097   PetscFunctionBegin;
2098   if (str == SAME_NONZERO_PATTERN) {
2099     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2100     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2101   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2102     PetscCall(MatAXPY_Basic(Y, a, X, str));
2103   } else {
2104     Mat       B;
2105     PetscInt *nnz_d, *nnz_o;
2106 
2107     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2108     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2109     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2110     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2111     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2112     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2113     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2114     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2115     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2116     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2117     PetscCall(MatHeaderMerge(Y, &B));
2118     PetscCall(PetscFree(nnz_d));
2119     PetscCall(PetscFree(nnz_o));
2120   }
2121   PetscFunctionReturn(PETSC_SUCCESS);
2122 }
2123 
2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2125 
2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2127 {
2128   PetscFunctionBegin;
2129   if (PetscDefined(USE_COMPLEX)) {
2130     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2131 
2132     PetscCall(MatConjugate_SeqAIJ(aij->A));
2133     PetscCall(MatConjugate_SeqAIJ(aij->B));
2134   }
2135   PetscFunctionReturn(PETSC_SUCCESS);
2136 }
2137 
2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2139 {
2140   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2141 
2142   PetscFunctionBegin;
2143   PetscCall(MatRealPart(a->A));
2144   PetscCall(MatRealPart(a->B));
2145   PetscFunctionReturn(PETSC_SUCCESS);
2146 }
2147 
2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2149 {
2150   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2151 
2152   PetscFunctionBegin;
2153   PetscCall(MatImaginaryPart(a->A));
2154   PetscCall(MatImaginaryPart(a->B));
2155   PetscFunctionReturn(PETSC_SUCCESS);
2156 }
2157 
2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2159 {
2160   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2161   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2162   PetscScalar       *vv;
2163   Vec                vB, vA;
2164   const PetscScalar *va, *vb;
2165 
2166   PetscFunctionBegin;
2167   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2168   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2169 
2170   PetscCall(VecGetArrayRead(vA, &va));
2171   if (idx) {
2172     for (i = 0; i < m; i++) {
2173       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2174     }
2175   }
2176 
2177   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2178   PetscCall(PetscMalloc1(m, &idxb));
2179   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2180 
2181   PetscCall(VecGetArrayWrite(v, &vv));
2182   PetscCall(VecGetArrayRead(vB, &vb));
2183   for (i = 0; i < m; i++) {
2184     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2185       vv[i] = vb[i];
2186       if (idx) idx[i] = a->garray[idxb[i]];
2187     } else {
2188       vv[i] = va[i];
2189       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2190     }
2191   }
2192   PetscCall(VecRestoreArrayWrite(v, &vv));
2193   PetscCall(VecRestoreArrayRead(vA, &va));
2194   PetscCall(VecRestoreArrayRead(vB, &vb));
2195   PetscCall(PetscFree(idxb));
2196   PetscCall(VecDestroy(&vA));
2197   PetscCall(VecDestroy(&vB));
2198   PetscFunctionReturn(PETSC_SUCCESS);
2199 }
2200 
2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2202 {
2203   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2204   Vec         vB, vA;
2205 
2206   PetscFunctionBegin;
2207   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2208   PetscCall(MatGetRowSumAbs(a->A, vA));
2209   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2210   PetscCall(MatGetRowSumAbs(a->B, vB));
2211   PetscCall(VecAXPY(vA, 1.0, vB));
2212   PetscCall(VecDestroy(&vB));
2213   PetscCall(VecCopy(vA, v));
2214   PetscCall(VecDestroy(&vA));
2215   PetscFunctionReturn(PETSC_SUCCESS);
2216 }
2217 
2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2219 {
2220   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2221   PetscInt           m = A->rmap->n, n = A->cmap->n;
2222   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2223   PetscInt          *cmap = mat->garray;
2224   PetscInt          *diagIdx, *offdiagIdx;
2225   Vec                diagV, offdiagV;
2226   PetscScalar       *a, *diagA, *offdiagA;
2227   const PetscScalar *ba, *bav;
2228   PetscInt           r, j, col, ncols, *bi, *bj;
2229   Mat                B = mat->B;
2230   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2231 
2232   PetscFunctionBegin;
2233   /* When a process holds entire A and other processes have no entry */
2234   if (A->cmap->N == n) {
2235     PetscCall(VecGetArrayWrite(v, &diagA));
2236     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2237     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2238     PetscCall(VecDestroy(&diagV));
2239     PetscCall(VecRestoreArrayWrite(v, &diagA));
2240     PetscFunctionReturn(PETSC_SUCCESS);
2241   } else if (n == 0) {
2242     if (m) {
2243       PetscCall(VecGetArrayWrite(v, &a));
2244       for (r = 0; r < m; r++) {
2245         a[r] = 0.0;
2246         if (idx) idx[r] = -1;
2247       }
2248       PetscCall(VecRestoreArrayWrite(v, &a));
2249     }
2250     PetscFunctionReturn(PETSC_SUCCESS);
2251   }
2252 
2253   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2254   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2255   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2256   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2257 
2258   /* Get offdiagIdx[] for implicit 0.0 */
2259   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2260   ba = bav;
2261   bi = b->i;
2262   bj = b->j;
2263   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2264   for (r = 0; r < m; r++) {
2265     ncols = bi[r + 1] - bi[r];
2266     if (ncols == A->cmap->N - n) { /* Brow is dense */
2267       offdiagA[r]   = *ba;
2268       offdiagIdx[r] = cmap[0];
2269     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2270       offdiagA[r] = 0.0;
2271 
2272       /* Find first hole in the cmap */
2273       for (j = 0; j < ncols; j++) {
2274         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2275         if (col > j && j < cstart) {
2276           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2277           break;
2278         } else if (col > j + n && j >= cstart) {
2279           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2280           break;
2281         }
2282       }
2283       if (j == ncols && ncols < A->cmap->N - n) {
2284         /* a hole is outside compressed Bcols */
2285         if (ncols == 0) {
2286           if (cstart) {
2287             offdiagIdx[r] = 0;
2288           } else offdiagIdx[r] = cend;
2289         } else { /* ncols > 0 */
2290           offdiagIdx[r] = cmap[ncols - 1] + 1;
2291           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2292         }
2293       }
2294     }
2295 
2296     for (j = 0; j < ncols; j++) {
2297       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2298         offdiagA[r]   = *ba;
2299         offdiagIdx[r] = cmap[*bj];
2300       }
2301       ba++;
2302       bj++;
2303     }
2304   }
2305 
2306   PetscCall(VecGetArrayWrite(v, &a));
2307   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2308   for (r = 0; r < m; ++r) {
2309     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2310       a[r] = diagA[r];
2311       if (idx) idx[r] = cstart + diagIdx[r];
2312     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2313       a[r] = diagA[r];
2314       if (idx) {
2315         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2316           idx[r] = cstart + diagIdx[r];
2317         } else idx[r] = offdiagIdx[r];
2318       }
2319     } else {
2320       a[r] = offdiagA[r];
2321       if (idx) idx[r] = offdiagIdx[r];
2322     }
2323   }
2324   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2325   PetscCall(VecRestoreArrayWrite(v, &a));
2326   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2327   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2328   PetscCall(VecDestroy(&diagV));
2329   PetscCall(VecDestroy(&offdiagV));
2330   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2331   PetscFunctionReturn(PETSC_SUCCESS);
2332 }
2333 
2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2335 {
2336   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2337   PetscInt           m = A->rmap->n, n = A->cmap->n;
2338   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2339   PetscInt          *cmap = mat->garray;
2340   PetscInt          *diagIdx, *offdiagIdx;
2341   Vec                diagV, offdiagV;
2342   PetscScalar       *a, *diagA, *offdiagA;
2343   const PetscScalar *ba, *bav;
2344   PetscInt           r, j, col, ncols, *bi, *bj;
2345   Mat                B = mat->B;
2346   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2347 
2348   PetscFunctionBegin;
2349   /* When a process holds entire A and other processes have no entry */
2350   if (A->cmap->N == n) {
2351     PetscCall(VecGetArrayWrite(v, &diagA));
2352     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2353     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2354     PetscCall(VecDestroy(&diagV));
2355     PetscCall(VecRestoreArrayWrite(v, &diagA));
2356     PetscFunctionReturn(PETSC_SUCCESS);
2357   } else if (n == 0) {
2358     if (m) {
2359       PetscCall(VecGetArrayWrite(v, &a));
2360       for (r = 0; r < m; r++) {
2361         a[r] = PETSC_MAX_REAL;
2362         if (idx) idx[r] = -1;
2363       }
2364       PetscCall(VecRestoreArrayWrite(v, &a));
2365     }
2366     PetscFunctionReturn(PETSC_SUCCESS);
2367   }
2368 
2369   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2370   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2371   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2372   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2373 
2374   /* Get offdiagIdx[] for implicit 0.0 */
2375   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2376   ba = bav;
2377   bi = b->i;
2378   bj = b->j;
2379   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2380   for (r = 0; r < m; r++) {
2381     ncols = bi[r + 1] - bi[r];
2382     if (ncols == A->cmap->N - n) { /* Brow is dense */
2383       offdiagA[r]   = *ba;
2384       offdiagIdx[r] = cmap[0];
2385     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2386       offdiagA[r] = 0.0;
2387 
2388       /* Find first hole in the cmap */
2389       for (j = 0; j < ncols; j++) {
2390         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2391         if (col > j && j < cstart) {
2392           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2393           break;
2394         } else if (col > j + n && j >= cstart) {
2395           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2396           break;
2397         }
2398       }
2399       if (j == ncols && ncols < A->cmap->N - n) {
2400         /* a hole is outside compressed Bcols */
2401         if (ncols == 0) {
2402           if (cstart) {
2403             offdiagIdx[r] = 0;
2404           } else offdiagIdx[r] = cend;
2405         } else { /* ncols > 0 */
2406           offdiagIdx[r] = cmap[ncols - 1] + 1;
2407           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2408         }
2409       }
2410     }
2411 
2412     for (j = 0; j < ncols; j++) {
2413       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2414         offdiagA[r]   = *ba;
2415         offdiagIdx[r] = cmap[*bj];
2416       }
2417       ba++;
2418       bj++;
2419     }
2420   }
2421 
2422   PetscCall(VecGetArrayWrite(v, &a));
2423   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2424   for (r = 0; r < m; ++r) {
2425     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2426       a[r] = diagA[r];
2427       if (idx) idx[r] = cstart + diagIdx[r];
2428     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2429       a[r] = diagA[r];
2430       if (idx) {
2431         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2432           idx[r] = cstart + diagIdx[r];
2433         } else idx[r] = offdiagIdx[r];
2434       }
2435     } else {
2436       a[r] = offdiagA[r];
2437       if (idx) idx[r] = offdiagIdx[r];
2438     }
2439   }
2440   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2441   PetscCall(VecRestoreArrayWrite(v, &a));
2442   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2443   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2444   PetscCall(VecDestroy(&diagV));
2445   PetscCall(VecDestroy(&offdiagV));
2446   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2447   PetscFunctionReturn(PETSC_SUCCESS);
2448 }
2449 
2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2451 {
2452   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2453   PetscInt           m = A->rmap->n, n = A->cmap->n;
2454   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2455   PetscInt          *cmap = mat->garray;
2456   PetscInt          *diagIdx, *offdiagIdx;
2457   Vec                diagV, offdiagV;
2458   PetscScalar       *a, *diagA, *offdiagA;
2459   const PetscScalar *ba, *bav;
2460   PetscInt           r, j, col, ncols, *bi, *bj;
2461   Mat                B = mat->B;
2462   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2463 
2464   PetscFunctionBegin;
2465   /* When a process holds entire A and other processes have no entry */
2466   if (A->cmap->N == n) {
2467     PetscCall(VecGetArrayWrite(v, &diagA));
2468     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2469     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2470     PetscCall(VecDestroy(&diagV));
2471     PetscCall(VecRestoreArrayWrite(v, &diagA));
2472     PetscFunctionReturn(PETSC_SUCCESS);
2473   } else if (n == 0) {
2474     if (m) {
2475       PetscCall(VecGetArrayWrite(v, &a));
2476       for (r = 0; r < m; r++) {
2477         a[r] = PETSC_MIN_REAL;
2478         if (idx) idx[r] = -1;
2479       }
2480       PetscCall(VecRestoreArrayWrite(v, &a));
2481     }
2482     PetscFunctionReturn(PETSC_SUCCESS);
2483   }
2484 
2485   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2486   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2487   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2488   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2489 
2490   /* Get offdiagIdx[] for implicit 0.0 */
2491   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2492   ba = bav;
2493   bi = b->i;
2494   bj = b->j;
2495   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2496   for (r = 0; r < m; r++) {
2497     ncols = bi[r + 1] - bi[r];
2498     if (ncols == A->cmap->N - n) { /* Brow is dense */
2499       offdiagA[r]   = *ba;
2500       offdiagIdx[r] = cmap[0];
2501     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2502       offdiagA[r] = 0.0;
2503 
2504       /* Find first hole in the cmap */
2505       for (j = 0; j < ncols; j++) {
2506         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2507         if (col > j && j < cstart) {
2508           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2509           break;
2510         } else if (col > j + n && j >= cstart) {
2511           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2512           break;
2513         }
2514       }
2515       if (j == ncols && ncols < A->cmap->N - n) {
2516         /* a hole is outside compressed Bcols */
2517         if (ncols == 0) {
2518           if (cstart) {
2519             offdiagIdx[r] = 0;
2520           } else offdiagIdx[r] = cend;
2521         } else { /* ncols > 0 */
2522           offdiagIdx[r] = cmap[ncols - 1] + 1;
2523           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2524         }
2525       }
2526     }
2527 
2528     for (j = 0; j < ncols; j++) {
2529       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2530         offdiagA[r]   = *ba;
2531         offdiagIdx[r] = cmap[*bj];
2532       }
2533       ba++;
2534       bj++;
2535     }
2536   }
2537 
2538   PetscCall(VecGetArrayWrite(v, &a));
2539   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2540   for (r = 0; r < m; ++r) {
2541     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2542       a[r] = diagA[r];
2543       if (idx) idx[r] = cstart + diagIdx[r];
2544     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2545       a[r] = diagA[r];
2546       if (idx) {
2547         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2548           idx[r] = cstart + diagIdx[r];
2549         } else idx[r] = offdiagIdx[r];
2550       }
2551     } else {
2552       a[r] = offdiagA[r];
2553       if (idx) idx[r] = offdiagIdx[r];
2554     }
2555   }
2556   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2557   PetscCall(VecRestoreArrayWrite(v, &a));
2558   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2559   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2560   PetscCall(VecDestroy(&diagV));
2561   PetscCall(VecDestroy(&offdiagV));
2562   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2563   PetscFunctionReturn(PETSC_SUCCESS);
2564 }
2565 
2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2567 {
2568   Mat *dummy;
2569 
2570   PetscFunctionBegin;
2571   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2572   *newmat = *dummy;
2573   PetscCall(PetscFree(dummy));
2574   PetscFunctionReturn(PETSC_SUCCESS);
2575 }
2576 
2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2578 {
2579   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2580 
2581   PetscFunctionBegin;
2582   PetscCall(MatInvertBlockDiagonal(a->A, values));
2583   A->factorerrortype = a->A->factorerrortype;
2584   PetscFunctionReturn(PETSC_SUCCESS);
2585 }
2586 
2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2588 {
2589   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2590 
2591   PetscFunctionBegin;
2592   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2593   PetscCall(MatSetRandom(aij->A, rctx));
2594   if (x->assembled) {
2595     PetscCall(MatSetRandom(aij->B, rctx));
2596   } else {
2597     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2598   }
2599   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2600   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2601   PetscFunctionReturn(PETSC_SUCCESS);
2602 }
2603 
2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2605 {
2606   PetscFunctionBegin;
2607   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2608   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2609   PetscFunctionReturn(PETSC_SUCCESS);
2610 }
2611 
2612 /*@
2613   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2614 
2615   Not Collective
2616 
2617   Input Parameter:
2618 . A - the matrix
2619 
2620   Output Parameter:
2621 . nz - the number of nonzeros
2622 
2623   Level: advanced
2624 
2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2626 @*/
2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2628 {
2629   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2630   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2631   PetscBool   isaij;
2632 
2633   PetscFunctionBegin;
2634   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2635   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2636   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2637   PetscFunctionReturn(PETSC_SUCCESS);
2638 }
2639 
2640 /*@
2641   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2642 
2643   Collective
2644 
2645   Input Parameters:
2646 + A  - the matrix
2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2648 
2649   Level: advanced
2650 
2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2652 @*/
2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2654 {
2655   PetscFunctionBegin;
2656   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2657   PetscFunctionReturn(PETSC_SUCCESS);
2658 }
2659 
2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject)
2661 {
2662   PetscBool sc = PETSC_FALSE, flg;
2663 
2664   PetscFunctionBegin;
2665   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2666   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2667   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2668   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2669   PetscOptionsHeadEnd();
2670   PetscFunctionReturn(PETSC_SUCCESS);
2671 }
2672 
2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2674 {
2675   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2676   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2677 
2678   PetscFunctionBegin;
2679   if (!Y->preallocated) {
2680     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2681   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2682     PetscInt nonew = aij->nonew;
2683     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2684     aij->nonew = nonew;
2685   }
2686   PetscCall(MatShift_Basic(Y, a));
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2691 {
2692   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2693 
2694   PetscFunctionBegin;
2695   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2696   PetscCall(MatMissingDiagonal(a->A, missing, d));
2697   if (d) {
2698     PetscInt rstart;
2699     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2700     *d += rstart;
2701   }
2702   PetscFunctionReturn(PETSC_SUCCESS);
2703 }
2704 
2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2706 {
2707   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2708 
2709   PetscFunctionBegin;
2710   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2711   PetscFunctionReturn(PETSC_SUCCESS);
2712 }
2713 
2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2715 {
2716   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2717 
2718   PetscFunctionBegin;
2719   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2720   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2721   PetscFunctionReturn(PETSC_SUCCESS);
2722 }
2723 
2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2725                                        MatGetRow_MPIAIJ,
2726                                        MatRestoreRow_MPIAIJ,
2727                                        MatMult_MPIAIJ,
2728                                        /* 4*/ MatMultAdd_MPIAIJ,
2729                                        MatMultTranspose_MPIAIJ,
2730                                        MatMultTransposeAdd_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        /*10*/ NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        MatSOR_MPIAIJ,
2738                                        MatTranspose_MPIAIJ,
2739                                        /*15*/ MatGetInfo_MPIAIJ,
2740                                        MatEqual_MPIAIJ,
2741                                        MatGetDiagonal_MPIAIJ,
2742                                        MatDiagonalScale_MPIAIJ,
2743                                        MatNorm_MPIAIJ,
2744                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2745                                        MatAssemblyEnd_MPIAIJ,
2746                                        MatSetOption_MPIAIJ,
2747                                        MatZeroEntries_MPIAIJ,
2748                                        /*24*/ MatZeroRows_MPIAIJ,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                        /*29*/ MatSetUp_MPI_Hash,
2754                                        NULL,
2755                                        NULL,
2756                                        MatGetDiagonalBlock_MPIAIJ,
2757                                        NULL,
2758                                        /*34*/ MatDuplicate_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        /*39*/ MatAXPY_MPIAIJ,
2764                                        MatCreateSubMatrices_MPIAIJ,
2765                                        MatIncreaseOverlap_MPIAIJ,
2766                                        MatGetValues_MPIAIJ,
2767                                        MatCopy_MPIAIJ,
2768                                        /*44*/ MatGetRowMax_MPIAIJ,
2769                                        MatScale_MPIAIJ,
2770                                        MatShift_MPIAIJ,
2771                                        MatDiagonalSet_MPIAIJ,
2772                                        MatZeroRowsColumns_MPIAIJ,
2773                                        /*49*/ MatSetRandom_MPIAIJ,
2774                                        MatGetRowIJ_MPIAIJ,
2775                                        MatRestoreRowIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2779                                        NULL,
2780                                        MatSetUnfactored_MPIAIJ,
2781                                        MatPermute_MPIAIJ,
2782                                        NULL,
2783                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2784                                        MatDestroy_MPIAIJ,
2785                                        MatView_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        /*64*/ NULL,
2789                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2790                                        NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2794                                        MatGetRowMinAbs_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        /*75*/ MatFDColoringApply_AIJ,
2800                                        MatSetFromOptions_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        MatFindZeroDiagonals_MPIAIJ,
2804                                        /*80*/ NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        /*83*/ MatLoad_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        /*89*/ NULL,
2814                                        NULL,
2815                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2816                                        NULL,
2817                                        NULL,
2818                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                        MatBindToCPU_MPIAIJ,
2823                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2824                                        NULL,
2825                                        NULL,
2826                                        MatConjugate_MPIAIJ,
2827                                        NULL,
2828                                        /*104*/ MatSetValuesRow_MPIAIJ,
2829                                        MatRealPart_MPIAIJ,
2830                                        MatImaginaryPart_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        /*109*/ NULL,
2834                                        NULL,
2835                                        MatGetRowMin_MPIAIJ,
2836                                        NULL,
2837                                        MatMissingDiagonal_MPIAIJ,
2838                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2839                                        NULL,
2840                                        MatGetGhosts_MPIAIJ,
2841                                        NULL,
2842                                        NULL,
2843                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2844                                        NULL,
2845                                        NULL,
2846                                        NULL,
2847                                        MatGetMultiProcBlock_MPIAIJ,
2848                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2849                                        MatGetColumnReductions_MPIAIJ,
2850                                        MatInvertBlockDiagonal_MPIAIJ,
2851                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2852                                        MatCreateSubMatricesMPI_MPIAIJ,
2853                                        /*129*/ NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2857                                        NULL,
2858                                        /*134*/ NULL,
2859                                        NULL,
2860                                        NULL,
2861                                        NULL,
2862                                        NULL,
2863                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2864                                        NULL,
2865                                        NULL,
2866                                        MatFDColoringSetUp_MPIXAIJ,
2867                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2868                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2869                                        /*145*/ NULL,
2870                                        NULL,
2871                                        NULL,
2872                                        MatCreateGraph_Simple_AIJ,
2873                                        NULL,
2874                                        /*150*/ NULL,
2875                                        MatEliminateZeros_MPIAIJ,
2876                                        MatGetRowSumAbs_MPIAIJ,
2877                                        NULL,
2878                                        NULL,
2879                                        /*155*/ NULL,
2880                                        MatCopyHashToXAIJ_MPI_Hash};
2881 
2882 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2883 {
2884   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2885 
2886   PetscFunctionBegin;
2887   PetscCall(MatStoreValues(aij->A));
2888   PetscCall(MatStoreValues(aij->B));
2889   PetscFunctionReturn(PETSC_SUCCESS);
2890 }
2891 
2892 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2893 {
2894   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2895 
2896   PetscFunctionBegin;
2897   PetscCall(MatRetrieveValues(aij->A));
2898   PetscCall(MatRetrieveValues(aij->B));
2899   PetscFunctionReturn(PETSC_SUCCESS);
2900 }
2901 
2902 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2903 {
2904   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2905   PetscMPIInt size;
2906 
2907   PetscFunctionBegin;
2908   if (B->hash_active) {
2909     B->ops[0]      = b->cops;
2910     B->hash_active = PETSC_FALSE;
2911   }
2912   PetscCall(PetscLayoutSetUp(B->rmap));
2913   PetscCall(PetscLayoutSetUp(B->cmap));
2914 
2915 #if defined(PETSC_USE_CTABLE)
2916   PetscCall(PetscHMapIDestroy(&b->colmap));
2917 #else
2918   PetscCall(PetscFree(b->colmap));
2919 #endif
2920   PetscCall(PetscFree(b->garray));
2921   PetscCall(VecDestroy(&b->lvec));
2922   PetscCall(VecScatterDestroy(&b->Mvctx));
2923 
2924   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2925 
2926   MatSeqXAIJGetOptions_Private(b->B);
2927   PetscCall(MatDestroy(&b->B));
2928   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2929   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2930   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2931   PetscCall(MatSetType(b->B, MATSEQAIJ));
2932   MatSeqXAIJRestoreOptions_Private(b->B);
2933 
2934   MatSeqXAIJGetOptions_Private(b->A);
2935   PetscCall(MatDestroy(&b->A));
2936   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2937   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2938   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2939   PetscCall(MatSetType(b->A, MATSEQAIJ));
2940   MatSeqXAIJRestoreOptions_Private(b->A);
2941 
2942   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2943   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2944   B->preallocated  = PETSC_TRUE;
2945   B->was_assembled = PETSC_FALSE;
2946   B->assembled     = PETSC_FALSE;
2947   PetscFunctionReturn(PETSC_SUCCESS);
2948 }
2949 
2950 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2951 {
2952   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2953   PetscBool   ondiagreset, offdiagreset, memoryreset;
2954 
2955   PetscFunctionBegin;
2956   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2957   PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()");
2958   if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS);
2959 
2960   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset));
2961   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset));
2962   memoryreset = (PetscBool)(ondiagreset || offdiagreset);
2963   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B)));
2964   if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS);
2965 
2966   PetscCall(PetscLayoutSetUp(B->rmap));
2967   PetscCall(PetscLayoutSetUp(B->cmap));
2968   PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled");
2969   PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2970   PetscCall(VecScatterDestroy(&b->Mvctx));
2971 
2972   B->preallocated  = PETSC_TRUE;
2973   B->was_assembled = PETSC_FALSE;
2974   B->assembled     = PETSC_FALSE;
2975   /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */
2976   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2977   PetscFunctionReturn(PETSC_SUCCESS);
2978 }
2979 
2980 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2981 {
2982   Mat         mat;
2983   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2984 
2985   PetscFunctionBegin;
2986   *newmat = NULL;
2987   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2988   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2989   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2990   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2991   a = (Mat_MPIAIJ *)mat->data;
2992 
2993   mat->factortype = matin->factortype;
2994   mat->assembled  = matin->assembled;
2995   mat->insertmode = NOT_SET_VALUES;
2996 
2997   a->size         = oldmat->size;
2998   a->rank         = oldmat->rank;
2999   a->donotstash   = oldmat->donotstash;
3000   a->roworiented  = oldmat->roworiented;
3001   a->rowindices   = NULL;
3002   a->rowvalues    = NULL;
3003   a->getrowactive = PETSC_FALSE;
3004 
3005   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3006   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3007   if (matin->hash_active) {
3008     PetscCall(MatSetUp(mat));
3009   } else {
3010     mat->preallocated = matin->preallocated;
3011     if (oldmat->colmap) {
3012 #if defined(PETSC_USE_CTABLE)
3013       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3014 #else
3015       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3016       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3017 #endif
3018     } else a->colmap = NULL;
3019     if (oldmat->garray) {
3020       PetscInt len;
3021       len = oldmat->B->cmap->n;
3022       PetscCall(PetscMalloc1(len + 1, &a->garray));
3023       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3024     } else a->garray = NULL;
3025 
3026     /* It may happen MatDuplicate is called with a non-assembled matrix
3027       In fact, MatDuplicate only requires the matrix to be preallocated
3028       This may happen inside a DMCreateMatrix_Shell */
3029     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3030     if (oldmat->Mvctx) {
3031       a->Mvctx = oldmat->Mvctx;
3032       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3033     }
3034     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3035     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3036   }
3037   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3038   *newmat = mat;
3039   PetscFunctionReturn(PETSC_SUCCESS);
3040 }
3041 
3042 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3043 {
3044   PetscBool isbinary, ishdf5;
3045 
3046   PetscFunctionBegin;
3047   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3048   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3049   /* force binary viewer to load .info file if it has not yet done so */
3050   PetscCall(PetscViewerSetUp(viewer));
3051   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3052   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3053   if (isbinary) {
3054     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3055   } else if (ishdf5) {
3056 #if defined(PETSC_HAVE_HDF5)
3057     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3058 #else
3059     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3060 #endif
3061   } else {
3062     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3063   }
3064   PetscFunctionReturn(PETSC_SUCCESS);
3065 }
3066 
3067 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3068 {
3069   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3070   PetscInt    *rowidxs, *colidxs;
3071   PetscScalar *matvals;
3072 
3073   PetscFunctionBegin;
3074   PetscCall(PetscViewerSetUp(viewer));
3075 
3076   /* read in matrix header */
3077   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3078   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3079   M  = header[1];
3080   N  = header[2];
3081   nz = header[3];
3082   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3083   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3084   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3085 
3086   /* set block sizes from the viewer's .info file */
3087   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3088   /* set global sizes if not set already */
3089   if (mat->rmap->N < 0) mat->rmap->N = M;
3090   if (mat->cmap->N < 0) mat->cmap->N = N;
3091   PetscCall(PetscLayoutSetUp(mat->rmap));
3092   PetscCall(PetscLayoutSetUp(mat->cmap));
3093 
3094   /* check if the matrix sizes are correct */
3095   PetscCall(MatGetSize(mat, &rows, &cols));
3096   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3097 
3098   /* read in row lengths and build row indices */
3099   PetscCall(MatGetLocalSize(mat, &m, NULL));
3100   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3101   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3102   rowidxs[0] = 0;
3103   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3104   if (nz != PETSC_INT_MAX) {
3105     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3106     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3107   }
3108 
3109   /* read in column indices and matrix values */
3110   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3111   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3112   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3113   /* store matrix indices and values */
3114   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3115   PetscCall(PetscFree(rowidxs));
3116   PetscCall(PetscFree2(colidxs, matvals));
3117   PetscFunctionReturn(PETSC_SUCCESS);
3118 }
3119 
3120 /* Not scalable because of ISAllGather() unless getting all columns. */
3121 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3122 {
3123   IS          iscol_local;
3124   PetscBool   isstride;
3125   PetscMPIInt gisstride = 0;
3126 
3127   PetscFunctionBegin;
3128   /* check if we are grabbing all columns*/
3129   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3130 
3131   if (isstride) {
3132     PetscInt start, len, mstart, mlen;
3133     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3134     PetscCall(ISGetLocalSize(iscol, &len));
3135     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3136     if (mstart == start && mlen - mstart == len) gisstride = 1;
3137   }
3138 
3139   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3140   if (gisstride) {
3141     PetscInt N;
3142     PetscCall(MatGetSize(mat, NULL, &N));
3143     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3144     PetscCall(ISSetIdentity(iscol_local));
3145     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3146   } else {
3147     PetscInt cbs;
3148     PetscCall(ISGetBlockSize(iscol, &cbs));
3149     PetscCall(ISAllGather(iscol, &iscol_local));
3150     PetscCall(ISSetBlockSize(iscol_local, cbs));
3151   }
3152 
3153   *isseq = iscol_local;
3154   PetscFunctionReturn(PETSC_SUCCESS);
3155 }
3156 
3157 /*
3158  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3159  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3160 
3161  Input Parameters:
3162 +   mat - matrix
3163 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3164            i.e., mat->rstart <= isrow[i] < mat->rend
3165 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3166            i.e., mat->cstart <= iscol[i] < mat->cend
3167 
3168  Output Parameters:
3169 +   isrow_d - sequential row index set for retrieving mat->A
3170 .   iscol_d - sequential  column index set for retrieving mat->A
3171 .   iscol_o - sequential column index set for retrieving mat->B
3172 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3173  */
3174 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[])
3175 {
3176   Vec             x, cmap;
3177   const PetscInt *is_idx;
3178   PetscScalar    *xarray, *cmaparray;
3179   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3180   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3181   Mat             B    = a->B;
3182   Vec             lvec = a->lvec, lcmap;
3183   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3184   MPI_Comm        comm;
3185   VecScatter      Mvctx = a->Mvctx;
3186 
3187   PetscFunctionBegin;
3188   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3189   PetscCall(ISGetLocalSize(iscol, &ncols));
3190 
3191   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3192   PetscCall(MatCreateVecs(mat, &x, NULL));
3193   PetscCall(VecSet(x, -1.0));
3194   PetscCall(VecDuplicate(x, &cmap));
3195   PetscCall(VecSet(cmap, -1.0));
3196 
3197   /* Get start indices */
3198   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3199   isstart -= ncols;
3200   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3201 
3202   PetscCall(ISGetIndices(iscol, &is_idx));
3203   PetscCall(VecGetArray(x, &xarray));
3204   PetscCall(VecGetArray(cmap, &cmaparray));
3205   PetscCall(PetscMalloc1(ncols, &idx));
3206   for (i = 0; i < ncols; i++) {
3207     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3208     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3209     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3210   }
3211   PetscCall(VecRestoreArray(x, &xarray));
3212   PetscCall(VecRestoreArray(cmap, &cmaparray));
3213   PetscCall(ISRestoreIndices(iscol, &is_idx));
3214 
3215   /* Get iscol_d */
3216   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3217   PetscCall(ISGetBlockSize(iscol, &i));
3218   PetscCall(ISSetBlockSize(*iscol_d, i));
3219 
3220   /* Get isrow_d */
3221   PetscCall(ISGetLocalSize(isrow, &m));
3222   rstart = mat->rmap->rstart;
3223   PetscCall(PetscMalloc1(m, &idx));
3224   PetscCall(ISGetIndices(isrow, &is_idx));
3225   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3226   PetscCall(ISRestoreIndices(isrow, &is_idx));
3227 
3228   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3229   PetscCall(ISGetBlockSize(isrow, &i));
3230   PetscCall(ISSetBlockSize(*isrow_d, i));
3231 
3232   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3233   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3234   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3235 
3236   PetscCall(VecDuplicate(lvec, &lcmap));
3237 
3238   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3239   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3240 
3241   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3242   /* off-process column indices */
3243   count = 0;
3244   PetscCall(PetscMalloc1(Bn, &idx));
3245   PetscCall(PetscMalloc1(Bn, &cmap1));
3246 
3247   PetscCall(VecGetArray(lvec, &xarray));
3248   PetscCall(VecGetArray(lcmap, &cmaparray));
3249   for (i = 0; i < Bn; i++) {
3250     if (PetscRealPart(xarray[i]) > -1.0) {
3251       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3252       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3253       count++;
3254     }
3255   }
3256   PetscCall(VecRestoreArray(lvec, &xarray));
3257   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3258 
3259   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3260   /* cannot ensure iscol_o has same blocksize as iscol! */
3261 
3262   PetscCall(PetscFree(idx));
3263   *garray = cmap1;
3264 
3265   PetscCall(VecDestroy(&x));
3266   PetscCall(VecDestroy(&cmap));
3267   PetscCall(VecDestroy(&lcmap));
3268   PetscFunctionReturn(PETSC_SUCCESS);
3269 }
3270 
3271 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3272 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3273 {
3274   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3275   Mat         M = NULL;
3276   MPI_Comm    comm;
3277   IS          iscol_d, isrow_d, iscol_o;
3278   Mat         Asub = NULL, Bsub = NULL;
3279   PetscInt    n, count, M_size, N_size;
3280 
3281   PetscFunctionBegin;
3282   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3283 
3284   if (call == MAT_REUSE_MATRIX) {
3285     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3286     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3287     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3288 
3289     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3290     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3291 
3292     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3293     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3294 
3295     /* Update diagonal and off-diagonal portions of submat */
3296     asub = (Mat_MPIAIJ *)(*submat)->data;
3297     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3298     PetscCall(ISGetLocalSize(iscol_o, &n));
3299     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3300     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3301     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3302 
3303   } else { /* call == MAT_INITIAL_MATRIX) */
3304     PetscInt *garray, *garray_compact;
3305     PetscInt  BsubN;
3306 
3307     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3308     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3309 
3310     /* Create local submatrices Asub and Bsub */
3311     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3312     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3313 
3314     // Compact garray so its not of size Bn
3315     PetscCall(ISGetSize(iscol_o, &count));
3316     PetscCall(PetscMalloc1(count, &garray_compact));
3317     PetscCall(PetscArraycpy(garray_compact, garray, count));
3318 
3319     /* Create submatrix M */
3320     PetscCall(ISGetSize(isrow, &M_size));
3321     PetscCall(ISGetSize(iscol, &N_size));
3322     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M));
3323 
3324     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3325     asub = (Mat_MPIAIJ *)M->data;
3326 
3327     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3328     n = asub->B->cmap->N;
3329     if (BsubN > n) {
3330       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3331       const PetscInt *idx;
3332       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3333       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3334 
3335       PetscCall(PetscMalloc1(n, &idx_new));
3336       j = 0;
3337       PetscCall(ISGetIndices(iscol_o, &idx));
3338       for (i = 0; i < n; i++) {
3339         if (j >= BsubN) break;
3340         while (subgarray[i] > garray[j]) j++;
3341 
3342         if (subgarray[i] == garray[j]) {
3343           idx_new[i] = idx[j++];
3344         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3345       }
3346       PetscCall(ISRestoreIndices(iscol_o, &idx));
3347 
3348       PetscCall(ISDestroy(&iscol_o));
3349       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3350 
3351     } else if (BsubN < n) {
3352       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3353     }
3354 
3355     PetscCall(PetscFree(garray));
3356     *submat = M;
3357 
3358     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3359     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3360     PetscCall(ISDestroy(&isrow_d));
3361 
3362     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3363     PetscCall(ISDestroy(&iscol_d));
3364 
3365     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3366     PetscCall(ISDestroy(&iscol_o));
3367   }
3368   PetscFunctionReturn(PETSC_SUCCESS);
3369 }
3370 
3371 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3372 {
3373   IS        iscol_local = NULL, isrow_d;
3374   PetscInt  csize;
3375   PetscInt  n, i, j, start, end;
3376   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3377   MPI_Comm  comm;
3378 
3379   PetscFunctionBegin;
3380   /* If isrow has same processor distribution as mat,
3381      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3382   if (call == MAT_REUSE_MATRIX) {
3383     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3384     if (isrow_d) {
3385       sameRowDist  = PETSC_TRUE;
3386       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3387     } else {
3388       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3389       if (iscol_local) {
3390         sameRowDist  = PETSC_TRUE;
3391         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3392       }
3393     }
3394   } else {
3395     /* Check if isrow has same processor distribution as mat */
3396     sameDist[0] = PETSC_FALSE;
3397     PetscCall(ISGetLocalSize(isrow, &n));
3398     if (!n) {
3399       sameDist[0] = PETSC_TRUE;
3400     } else {
3401       PetscCall(ISGetMinMax(isrow, &i, &j));
3402       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3403       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3404     }
3405 
3406     /* Check if iscol has same processor distribution as mat */
3407     sameDist[1] = PETSC_FALSE;
3408     PetscCall(ISGetLocalSize(iscol, &n));
3409     if (!n) {
3410       sameDist[1] = PETSC_TRUE;
3411     } else {
3412       PetscCall(ISGetMinMax(iscol, &i, &j));
3413       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3414       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3415     }
3416 
3417     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3418     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3419     sameRowDist = tsameDist[0];
3420   }
3421 
3422   if (sameRowDist) {
3423     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3424       /* isrow and iscol have same processor distribution as mat */
3425       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3426       PetscFunctionReturn(PETSC_SUCCESS);
3427     } else { /* sameRowDist */
3428       /* isrow has same processor distribution as mat */
3429       if (call == MAT_INITIAL_MATRIX) {
3430         PetscBool sorted;
3431         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3432         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3433         PetscCall(ISGetSize(iscol, &i));
3434         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3435 
3436         PetscCall(ISSorted(iscol_local, &sorted));
3437         if (sorted) {
3438           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3439           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3440           PetscFunctionReturn(PETSC_SUCCESS);
3441         }
3442       } else { /* call == MAT_REUSE_MATRIX */
3443         IS iscol_sub;
3444         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3445         if (iscol_sub) {
3446           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3447           PetscFunctionReturn(PETSC_SUCCESS);
3448         }
3449       }
3450     }
3451   }
3452 
3453   /* General case: iscol -> iscol_local which has global size of iscol */
3454   if (call == MAT_REUSE_MATRIX) {
3455     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3456     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3457   } else {
3458     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3459   }
3460 
3461   PetscCall(ISGetLocalSize(iscol, &csize));
3462   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3463 
3464   if (call == MAT_INITIAL_MATRIX) {
3465     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3466     PetscCall(ISDestroy(&iscol_local));
3467   }
3468   PetscFunctionReturn(PETSC_SUCCESS);
3469 }
3470 
3471 /*@C
3472   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3473   and "off-diagonal" part of the matrix in CSR format.
3474 
3475   Collective
3476 
3477   Input Parameters:
3478 + comm   - MPI communicator
3479 . M      - the global row size
3480 . N      - the global column size
3481 . A      - "diagonal" portion of matrix
3482 . B      - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray
3483 - garray - either `NULL` or the global index of `B` columns
3484 
3485   Output Parameter:
3486 . mat - the matrix, with input `A` as its local diagonal matrix
3487 
3488   Level: advanced
3489 
3490   Notes:
3491   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3492 
3493   `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore.
3494 
3495 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3496 @*/
3497 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat)
3498 {
3499   PetscInt m, n;
3500   MatType  mpi_mat_type;
3501 
3502   PetscFunctionBegin;
3503   PetscCall(MatCreate(comm, mat));
3504   PetscCall(MatGetSize(A, &m, &n));
3505   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3506   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3507 
3508   PetscCall(MatSetSizes(*mat, m, n, M, N));
3509   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3510   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3511   PetscCall(MatSetType(*mat, mpi_mat_type));
3512 
3513   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3514 
3515   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3516   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3517   PetscCall(MatSetMPIAIJWithSplitSeqAIJ(*mat, A, B, garray));
3518   PetscFunctionReturn(PETSC_SUCCESS);
3519 }
3520 
3521 /*
3522   MatSetMPIAIJWithSplitSeqAIJ - Set the diag and offdiag matrices of a `MATMPIAIJ` matrix.
3523    It is similar to `MatCreateMPIAIJWithSplitArrays()`. This routine allows passing in
3524    B with local indices and the correct size, along with the accompanying
3525    garray, hence skipping compactification
3526 
3527   Collective
3528 
3529   Input Parameters:
3530 +  mat    - the MATMPIAIJ matrix, which should have its type and layout set, but should not have its diag, offdiag matrices set
3531 .  A      - the diag matrix using local col ids
3532 .  B      - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray
3533 -  garray - either `NULL` or the global index of `B` columns
3534 
3535   Output Parameter:
3536 .  mat   - the updated `MATMPIAIJ` matrix
3537 
3538   Level: advanced
3539 
3540   Notes:
3541   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3542 
3543   `A` and `B` become part of output mat. The user cannot use `A` and `B` anymore.
3544 
3545 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3546 */
3547 PETSC_INTERN PetscErrorCode MatSetMPIAIJWithSplitSeqAIJ(Mat mat, Mat A, Mat B, PetscInt *garray)
3548 {
3549   PetscFunctionBegin;
3550   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
3551   PetscInt    m, n, M, N, Am, An, Bm, Bn;
3552 
3553   PetscCall(MatGetSize(mat, &M, &N));
3554   PetscCall(MatGetLocalSize(mat, &m, &n));
3555   PetscCall(MatGetLocalSize(A, &Am, &An));
3556   PetscCall(MatGetLocalSize(B, &Bm, &Bn));
3557 
3558   PetscCheck(m == Am && m == Bm, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of rows do not match");
3559   PetscCheck(n == An, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of columns do not match");
3560   PetscCheck(!mpiaij->A && !mpiaij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "A, B of the MPIAIJ matrix are not empty");
3561   mpiaij->A      = A;
3562   mpiaij->B      = B;
3563   mpiaij->garray = garray;
3564 
3565   mat->preallocated     = PETSC_TRUE;
3566   mat->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */
3567 
3568   PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3569   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3570   /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and
3571    also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced
3572    */
3573   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3574   PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3575   PetscCall(MatSetOption(mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3576   PetscFunctionReturn(PETSC_SUCCESS);
3577 }
3578 
3579 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3580 
3581 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3582 {
3583   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3584   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3585   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3586   Mat             M, Msub, B = a->B;
3587   MatScalar      *aa;
3588   Mat_SeqAIJ     *aij;
3589   PetscInt       *garray = a->garray, *colsub, Ncols;
3590   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3591   IS              iscol_sub, iscmap;
3592   const PetscInt *is_idx, *cmap;
3593   PetscBool       allcolumns = PETSC_FALSE;
3594   MPI_Comm        comm;
3595 
3596   PetscFunctionBegin;
3597   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3598   if (call == MAT_REUSE_MATRIX) {
3599     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3600     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3601     PetscCall(ISGetLocalSize(iscol_sub, &count));
3602 
3603     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3604     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3605 
3606     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3607     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3608 
3609     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3610 
3611   } else { /* call == MAT_INITIAL_MATRIX) */
3612     PetscBool flg;
3613 
3614     PetscCall(ISGetLocalSize(iscol, &n));
3615     PetscCall(ISGetSize(iscol, &Ncols));
3616 
3617     /* (1) iscol -> nonscalable iscol_local */
3618     /* Check for special case: each processor gets entire matrix columns */
3619     PetscCall(ISIdentity(iscol_local, &flg));
3620     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3621     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3622     if (allcolumns) {
3623       iscol_sub = iscol_local;
3624       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3625       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3626 
3627     } else {
3628       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3629       PetscInt *idx, *cmap1, k;
3630       PetscCall(PetscMalloc1(Ncols, &idx));
3631       PetscCall(PetscMalloc1(Ncols, &cmap1));
3632       PetscCall(ISGetIndices(iscol_local, &is_idx));
3633       count = 0;
3634       k     = 0;
3635       for (i = 0; i < Ncols; i++) {
3636         j = is_idx[i];
3637         if (j >= cstart && j < cend) {
3638           /* diagonal part of mat */
3639           idx[count]     = j;
3640           cmap1[count++] = i; /* column index in submat */
3641         } else if (Bn) {
3642           /* off-diagonal part of mat */
3643           if (j == garray[k]) {
3644             idx[count]     = j;
3645             cmap1[count++] = i; /* column index in submat */
3646           } else if (j > garray[k]) {
3647             while (j > garray[k] && k < Bn - 1) k++;
3648             if (j == garray[k]) {
3649               idx[count]     = j;
3650               cmap1[count++] = i; /* column index in submat */
3651             }
3652           }
3653         }
3654       }
3655       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3656 
3657       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3658       PetscCall(ISGetBlockSize(iscol, &cbs));
3659       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3660 
3661       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3662     }
3663 
3664     /* (3) Create sequential Msub */
3665     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3666   }
3667 
3668   PetscCall(ISGetLocalSize(iscol_sub, &count));
3669   aij = (Mat_SeqAIJ *)Msub->data;
3670   ii  = aij->i;
3671   PetscCall(ISGetIndices(iscmap, &cmap));
3672 
3673   /*
3674       m - number of local rows
3675       Ncols - number of columns (same on all processors)
3676       rstart - first row in new global matrix generated
3677   */
3678   PetscCall(MatGetSize(Msub, &m, NULL));
3679 
3680   if (call == MAT_INITIAL_MATRIX) {
3681     /* (4) Create parallel newmat */
3682     PetscMPIInt rank, size;
3683     PetscInt    csize;
3684 
3685     PetscCallMPI(MPI_Comm_size(comm, &size));
3686     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3687 
3688     /*
3689         Determine the number of non-zeros in the diagonal and off-diagonal
3690         portions of the matrix in order to do correct preallocation
3691     */
3692 
3693     /* first get start and end of "diagonal" columns */
3694     PetscCall(ISGetLocalSize(iscol, &csize));
3695     if (csize == PETSC_DECIDE) {
3696       PetscCall(ISGetSize(isrow, &mglobal));
3697       if (mglobal == Ncols) { /* square matrix */
3698         nlocal = m;
3699       } else {
3700         nlocal = Ncols / size + ((Ncols % size) > rank);
3701       }
3702     } else {
3703       nlocal = csize;
3704     }
3705     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3706     rstart = rend - nlocal;
3707     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3708 
3709     /* next, compute all the lengths */
3710     jj = aij->j;
3711     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3712     olens = dlens + m;
3713     for (i = 0; i < m; i++) {
3714       jend = ii[i + 1] - ii[i];
3715       olen = 0;
3716       dlen = 0;
3717       for (j = 0; j < jend; j++) {
3718         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3719         else dlen++;
3720         jj++;
3721       }
3722       olens[i] = olen;
3723       dlens[i] = dlen;
3724     }
3725 
3726     PetscCall(ISGetBlockSize(isrow, &bs));
3727     PetscCall(ISGetBlockSize(iscol, &cbs));
3728 
3729     PetscCall(MatCreate(comm, &M));
3730     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3731     PetscCall(MatSetBlockSizes(M, bs, cbs));
3732     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3733     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3734     PetscCall(PetscFree(dlens));
3735 
3736   } else { /* call == MAT_REUSE_MATRIX */
3737     M = *newmat;
3738     PetscCall(MatGetLocalSize(M, &i, NULL));
3739     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3740     PetscCall(MatZeroEntries(M));
3741     /*
3742          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3743        rather than the slower MatSetValues().
3744     */
3745     M->was_assembled = PETSC_TRUE;
3746     M->assembled     = PETSC_FALSE;
3747   }
3748 
3749   /* (5) Set values of Msub to *newmat */
3750   PetscCall(PetscMalloc1(count, &colsub));
3751   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3752 
3753   jj = aij->j;
3754   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3755   for (i = 0; i < m; i++) {
3756     row = rstart + i;
3757     nz  = ii[i + 1] - ii[i];
3758     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3759     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3760     jj += nz;
3761     aa += nz;
3762   }
3763   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3764   PetscCall(ISRestoreIndices(iscmap, &cmap));
3765 
3766   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3767   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3768 
3769   PetscCall(PetscFree(colsub));
3770 
3771   /* save Msub, iscol_sub and iscmap used in processor for next request */
3772   if (call == MAT_INITIAL_MATRIX) {
3773     *newmat = M;
3774     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3775     PetscCall(MatDestroy(&Msub));
3776 
3777     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3778     PetscCall(ISDestroy(&iscol_sub));
3779 
3780     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3781     PetscCall(ISDestroy(&iscmap));
3782 
3783     if (iscol_local) {
3784       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3785       PetscCall(ISDestroy(&iscol_local));
3786     }
3787   }
3788   PetscFunctionReturn(PETSC_SUCCESS);
3789 }
3790 
3791 /*
3792     Not great since it makes two copies of the submatrix, first an SeqAIJ
3793   in local and then by concatenating the local matrices the end result.
3794   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3795 
3796   This requires a sequential iscol with all indices.
3797 */
3798 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3799 {
3800   PetscMPIInt rank, size;
3801   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3802   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3803   Mat         M, Mreuse;
3804   MatScalar  *aa, *vwork;
3805   MPI_Comm    comm;
3806   Mat_SeqAIJ *aij;
3807   PetscBool   colflag, allcolumns = PETSC_FALSE;
3808 
3809   PetscFunctionBegin;
3810   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3811   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3812   PetscCallMPI(MPI_Comm_size(comm, &size));
3813 
3814   /* Check for special case: each processor gets entire matrix columns */
3815   PetscCall(ISIdentity(iscol, &colflag));
3816   PetscCall(ISGetLocalSize(iscol, &n));
3817   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3818   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3819 
3820   if (call == MAT_REUSE_MATRIX) {
3821     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3822     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3823     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3824   } else {
3825     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3826   }
3827 
3828   /*
3829       m - number of local rows
3830       n - number of columns (same on all processors)
3831       rstart - first row in new global matrix generated
3832   */
3833   PetscCall(MatGetSize(Mreuse, &m, &n));
3834   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3835   if (call == MAT_INITIAL_MATRIX) {
3836     aij = (Mat_SeqAIJ *)Mreuse->data;
3837     ii  = aij->i;
3838     jj  = aij->j;
3839 
3840     /*
3841         Determine the number of non-zeros in the diagonal and off-diagonal
3842         portions of the matrix in order to do correct preallocation
3843     */
3844 
3845     /* first get start and end of "diagonal" columns */
3846     if (csize == PETSC_DECIDE) {
3847       PetscCall(ISGetSize(isrow, &mglobal));
3848       if (mglobal == n) { /* square matrix */
3849         nlocal = m;
3850       } else {
3851         nlocal = n / size + ((n % size) > rank);
3852       }
3853     } else {
3854       nlocal = csize;
3855     }
3856     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3857     rstart = rend - nlocal;
3858     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3859 
3860     /* next, compute all the lengths */
3861     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3862     olens = dlens + m;
3863     for (i = 0; i < m; i++) {
3864       jend = ii[i + 1] - ii[i];
3865       olen = 0;
3866       dlen = 0;
3867       for (j = 0; j < jend; j++) {
3868         if (*jj < rstart || *jj >= rend) olen++;
3869         else dlen++;
3870         jj++;
3871       }
3872       olens[i] = olen;
3873       dlens[i] = dlen;
3874     }
3875     PetscCall(MatCreate(comm, &M));
3876     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3877     PetscCall(MatSetBlockSizes(M, bs, cbs));
3878     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3879     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3880     PetscCall(PetscFree(dlens));
3881   } else {
3882     PetscInt ml, nl;
3883 
3884     M = *newmat;
3885     PetscCall(MatGetLocalSize(M, &ml, &nl));
3886     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3887     PetscCall(MatZeroEntries(M));
3888     /*
3889          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3890        rather than the slower MatSetValues().
3891     */
3892     M->was_assembled = PETSC_TRUE;
3893     M->assembled     = PETSC_FALSE;
3894   }
3895   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3896   aij = (Mat_SeqAIJ *)Mreuse->data;
3897   ii  = aij->i;
3898   jj  = aij->j;
3899 
3900   /* trigger copy to CPU if needed */
3901   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3902   for (i = 0; i < m; i++) {
3903     row   = rstart + i;
3904     nz    = ii[i + 1] - ii[i];
3905     cwork = jj;
3906     jj    = PetscSafePointerPlusOffset(jj, nz);
3907     vwork = aa;
3908     aa    = PetscSafePointerPlusOffset(aa, nz);
3909     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3910   }
3911   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3912 
3913   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3914   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3915   *newmat = M;
3916 
3917   /* save submatrix used in processor for next request */
3918   if (call == MAT_INITIAL_MATRIX) {
3919     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3920     PetscCall(MatDestroy(&Mreuse));
3921   }
3922   PetscFunctionReturn(PETSC_SUCCESS);
3923 }
3924 
3925 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3926 {
3927   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3928   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3929   const PetscInt *JJ;
3930   PetscBool       nooffprocentries;
3931   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3932 
3933   PetscFunctionBegin;
3934   PetscCall(PetscLayoutSetUp(B->rmap));
3935   PetscCall(PetscLayoutSetUp(B->cmap));
3936   m       = B->rmap->n;
3937   cstart  = B->cmap->rstart;
3938   cend    = B->cmap->rend;
3939   rstart  = B->rmap->rstart;
3940   irstart = Ii[0];
3941 
3942   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3943 
3944   if (PetscDefined(USE_DEBUG)) {
3945     for (i = 0; i < m; i++) {
3946       nnz = Ii[i + 1] - Ii[i];
3947       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3948       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3949       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3950       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3951     }
3952   }
3953 
3954   for (i = 0; i < m; i++) {
3955     nnz     = Ii[i + 1] - Ii[i];
3956     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3957     nnz_max = PetscMax(nnz_max, nnz);
3958     d       = 0;
3959     for (j = 0; j < nnz; j++) {
3960       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3961     }
3962     d_nnz[i] = d;
3963     o_nnz[i] = nnz - d;
3964   }
3965   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3966   PetscCall(PetscFree2(d_nnz, o_nnz));
3967 
3968   for (i = 0; i < m; i++) {
3969     ii = i + rstart;
3970     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3971   }
3972   nooffprocentries    = B->nooffprocentries;
3973   B->nooffprocentries = PETSC_TRUE;
3974   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3975   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3976   B->nooffprocentries = nooffprocentries;
3977 
3978   /* count number of entries below block diagonal */
3979   PetscCall(PetscFree(Aij->ld));
3980   PetscCall(PetscCalloc1(m, &ld));
3981   Aij->ld = ld;
3982   for (i = 0; i < m; i++) {
3983     nnz = Ii[i + 1] - Ii[i];
3984     j   = 0;
3985     while (j < nnz && J[j] < cstart) j++;
3986     ld[i] = j;
3987     if (J) J += nnz;
3988   }
3989 
3990   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3991   PetscFunctionReturn(PETSC_SUCCESS);
3992 }
3993 
3994 /*@
3995   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3996   (the default parallel PETSc format).
3997 
3998   Collective
3999 
4000   Input Parameters:
4001 + B - the matrix
4002 . i - the indices into `j` for the start of each local row (indices start with zero)
4003 . j - the column indices for each local row (indices start with zero)
4004 - v - optional values in the matrix
4005 
4006   Level: developer
4007 
4008   Notes:
4009   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
4010   thus you CANNOT change the matrix entries by changing the values of `v` after you have
4011   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4012 
4013   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4014 
4015   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
4016 
4017   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
4018 
4019   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
4020   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4021 
4022   The format which is used for the sparse matrix input, is equivalent to a
4023   row-major ordering.. i.e for the following matrix, the input data expected is
4024   as shown
4025 .vb
4026         1 0 0
4027         2 0 3     P0
4028        -------
4029         4 5 6     P1
4030 
4031      Process0 [P0] rows_owned=[0,1]
4032         i =  {0,1,3}  [size = nrow+1  = 2+1]
4033         j =  {0,0,2}  [size = 3]
4034         v =  {1,2,3}  [size = 3]
4035 
4036      Process1 [P1] rows_owned=[2]
4037         i =  {0,3}    [size = nrow+1  = 1+1]
4038         j =  {0,1,2}  [size = 3]
4039         v =  {4,5,6}  [size = 3]
4040 .ve
4041 
4042 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4043           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4044 @*/
4045 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4046 {
4047   PetscFunctionBegin;
4048   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4049   PetscFunctionReturn(PETSC_SUCCESS);
4050 }
4051 
4052 /*@
4053   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4054   (the default parallel PETSc format).  For good matrix assembly performance
4055   the user should preallocate the matrix storage by setting the parameters
4056   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4057 
4058   Collective
4059 
4060   Input Parameters:
4061 + B     - the matrix
4062 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4063            (same value is used for all local rows)
4064 . d_nnz - array containing the number of nonzeros in the various rows of the
4065            DIAGONAL portion of the local submatrix (possibly different for each row)
4066            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4067            The size of this array is equal to the number of local rows, i.e 'm'.
4068            For matrices that will be factored, you must leave room for (and set)
4069            the diagonal entry even if it is zero.
4070 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4071            submatrix (same value is used for all local rows).
4072 - o_nnz - array containing the number of nonzeros in the various rows of the
4073            OFF-DIAGONAL portion of the local submatrix (possibly different for
4074            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4075            structure. The size of this array is equal to the number
4076            of local rows, i.e 'm'.
4077 
4078   Example Usage:
4079   Consider the following 8x8 matrix with 34 non-zero values, that is
4080   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4081   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4082   as follows
4083 
4084 .vb
4085             1  2  0  |  0  3  0  |  0  4
4086     Proc0   0  5  6  |  7  0  0  |  8  0
4087             9  0 10  | 11  0  0  | 12  0
4088     -------------------------------------
4089            13  0 14  | 15 16 17  |  0  0
4090     Proc1   0 18  0  | 19 20 21  |  0  0
4091             0  0  0  | 22 23  0  | 24  0
4092     -------------------------------------
4093     Proc2  25 26 27  |  0  0 28  | 29  0
4094            30  0  0  | 31 32 33  |  0 34
4095 .ve
4096 
4097   This can be represented as a collection of submatrices as
4098 .vb
4099       A B C
4100       D E F
4101       G H I
4102 .ve
4103 
4104   Where the submatrices A,B,C are owned by proc0, D,E,F are
4105   owned by proc1, G,H,I are owned by proc2.
4106 
4107   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4108   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4109   The 'M','N' parameters are 8,8, and have the same values on all procs.
4110 
4111   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4112   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4113   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4114   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4115   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4116   matrix, and [DF] as another `MATSEQAIJ` matrix.
4117 
4118   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4119   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4120   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4121   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4122   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4123   In this case, the values of `d_nz`, `o_nz` are
4124 .vb
4125      proc0  dnz = 2, o_nz = 2
4126      proc1  dnz = 3, o_nz = 2
4127      proc2  dnz = 1, o_nz = 4
4128 .ve
4129   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4130   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4131   for proc3. i.e we are using 12+15+10=37 storage locations to store
4132   34 values.
4133 
4134   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4135   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4136   In the above case the values for `d_nnz`, `o_nnz` are
4137 .vb
4138      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4139      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4140      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4141 .ve
4142   Here the space allocated is sum of all the above values i.e 34, and
4143   hence pre-allocation is perfect.
4144 
4145   Level: intermediate
4146 
4147   Notes:
4148   If the *_nnz parameter is given then the *_nz parameter is ignored
4149 
4150   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4151   storage.  The stored row and column indices begin with zero.
4152   See [Sparse Matrices](sec_matsparse) for details.
4153 
4154   The parallel matrix is partitioned such that the first m0 rows belong to
4155   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4156   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4157 
4158   The DIAGONAL portion of the local submatrix of a processor can be defined
4159   as the submatrix which is obtained by extraction the part corresponding to
4160   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4161   first row that belongs to the processor, r2 is the last row belonging to
4162   the this processor, and c1-c2 is range of indices of the local part of a
4163   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4164   common case of a square matrix, the row and column ranges are the same and
4165   the DIAGONAL part is also square. The remaining portion of the local
4166   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4167 
4168   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4169 
4170   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4171   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4172   You can also run with the option `-info` and look for messages with the string
4173   malloc in them to see if additional memory allocation was needed.
4174 
4175 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4176           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4177 @*/
4178 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4179 {
4180   PetscFunctionBegin;
4181   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4182   PetscValidType(B, 1);
4183   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4184   PetscFunctionReturn(PETSC_SUCCESS);
4185 }
4186 
4187 /*@
4188   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4189   CSR format for the local rows.
4190 
4191   Collective
4192 
4193   Input Parameters:
4194 + comm - MPI communicator
4195 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4196 . n    - This value should be the same as the local size used in creating the
4197          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4198          calculated if `N` is given) For square matrices n is almost always `m`.
4199 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4200 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4201 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4202 . j    - global column indices
4203 - a    - optional matrix values
4204 
4205   Output Parameter:
4206 . mat - the matrix
4207 
4208   Level: intermediate
4209 
4210   Notes:
4211   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4212   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4213   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4214 
4215   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4216 
4217   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4218 
4219   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4220   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4221 
4222   The format which is used for the sparse matrix input, is equivalent to a
4223   row-major ordering, i.e., for the following matrix, the input data expected is
4224   as shown
4225 .vb
4226         1 0 0
4227         2 0 3     P0
4228        -------
4229         4 5 6     P1
4230 
4231      Process0 [P0] rows_owned=[0,1]
4232         i =  {0,1,3}  [size = nrow+1  = 2+1]
4233         j =  {0,0,2}  [size = 3]
4234         v =  {1,2,3}  [size = 3]
4235 
4236      Process1 [P1] rows_owned=[2]
4237         i =  {0,3}    [size = nrow+1  = 1+1]
4238         j =  {0,1,2}  [size = 3]
4239         v =  {4,5,6}  [size = 3]
4240 .ve
4241 
4242 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4243           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4244 @*/
4245 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4246 {
4247   PetscFunctionBegin;
4248   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4249   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4250   PetscCall(MatCreate(comm, mat));
4251   PetscCall(MatSetSizes(*mat, m, n, M, N));
4252   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4253   PetscCall(MatSetType(*mat, MATMPIAIJ));
4254   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4255   PetscFunctionReturn(PETSC_SUCCESS);
4256 }
4257 
4258 /*@
4259   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4260   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4261   from `MatCreateMPIAIJWithArrays()`
4262 
4263   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4264 
4265   Collective
4266 
4267   Input Parameters:
4268 + mat - the matrix
4269 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4270 . n   - This value should be the same as the local size used in creating the
4271        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4272        calculated if N is given) For square matrices n is almost always m.
4273 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4274 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4275 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4276 . J   - column indices
4277 - v   - matrix values
4278 
4279   Level: deprecated
4280 
4281 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4282           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4283 @*/
4284 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4285 {
4286   PetscInt        nnz, i;
4287   PetscBool       nooffprocentries;
4288   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4289   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4290   PetscScalar    *ad, *ao;
4291   PetscInt        ldi, Iii, md;
4292   const PetscInt *Adi = Ad->i;
4293   PetscInt       *ld  = Aij->ld;
4294 
4295   PetscFunctionBegin;
4296   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4297   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4298   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4299   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4300 
4301   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4302   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4303 
4304   for (i = 0; i < m; i++) {
4305     if (PetscDefined(USE_DEBUG)) {
4306       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4307         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4308         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4309       }
4310     }
4311     nnz = Ii[i + 1] - Ii[i];
4312     Iii = Ii[i];
4313     ldi = ld[i];
4314     md  = Adi[i + 1] - Adi[i];
4315     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4316     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4317     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4318     ad += md;
4319     ao += nnz - md;
4320   }
4321   nooffprocentries      = mat->nooffprocentries;
4322   mat->nooffprocentries = PETSC_TRUE;
4323   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4324   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4325   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4326   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4327   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4328   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4329   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4330   mat->nooffprocentries = nooffprocentries;
4331   PetscFunctionReturn(PETSC_SUCCESS);
4332 }
4333 
4334 /*@
4335   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4336 
4337   Collective
4338 
4339   Input Parameters:
4340 + mat - the matrix
4341 - v   - matrix values, stored by row
4342 
4343   Level: intermediate
4344 
4345   Notes:
4346   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4347 
4348   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4349 
4350 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4351           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4352 @*/
4353 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4354 {
4355   PetscInt        nnz, i, m;
4356   PetscBool       nooffprocentries;
4357   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4358   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4359   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4360   PetscScalar    *ad, *ao;
4361   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4362   PetscInt        ldi, Iii, md;
4363   PetscInt       *ld = Aij->ld;
4364 
4365   PetscFunctionBegin;
4366   m = mat->rmap->n;
4367 
4368   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4369   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4370   Iii = 0;
4371   for (i = 0; i < m; i++) {
4372     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4373     ldi = ld[i];
4374     md  = Adi[i + 1] - Adi[i];
4375     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4376     ad += md;
4377     if (ao) {
4378       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4379       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4380       ao += nnz - md;
4381     }
4382     Iii += nnz;
4383   }
4384   nooffprocentries      = mat->nooffprocentries;
4385   mat->nooffprocentries = PETSC_TRUE;
4386   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4387   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4388   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4389   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4390   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4391   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4392   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4393   mat->nooffprocentries = nooffprocentries;
4394   PetscFunctionReturn(PETSC_SUCCESS);
4395 }
4396 
4397 /*@
4398   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4399   (the default parallel PETSc format).  For good matrix assembly performance
4400   the user should preallocate the matrix storage by setting the parameters
4401   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4402 
4403   Collective
4404 
4405   Input Parameters:
4406 + comm  - MPI communicator
4407 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4408           This value should be the same as the local size used in creating the
4409           y vector for the matrix-vector product y = Ax.
4410 . n     - This value should be the same as the local size used in creating the
4411           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4412           calculated if N is given) For square matrices n is almost always m.
4413 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4414 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4415 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4416           (same value is used for all local rows)
4417 . d_nnz - array containing the number of nonzeros in the various rows of the
4418           DIAGONAL portion of the local submatrix (possibly different for each row)
4419           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4420           The size of this array is equal to the number of local rows, i.e 'm'.
4421 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4422           submatrix (same value is used for all local rows).
4423 - o_nnz - array containing the number of nonzeros in the various rows of the
4424           OFF-DIAGONAL portion of the local submatrix (possibly different for
4425           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4426           structure. The size of this array is equal to the number
4427           of local rows, i.e 'm'.
4428 
4429   Output Parameter:
4430 . A - the matrix
4431 
4432   Options Database Keys:
4433 + -mat_no_inode                     - Do not use inodes
4434 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4435 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4436                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4437                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4438 
4439   Level: intermediate
4440 
4441   Notes:
4442   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4443   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4444   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4445 
4446   If the *_nnz parameter is given then the *_nz parameter is ignored
4447 
4448   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4449   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4450   storage requirements for this matrix.
4451 
4452   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4453   processor than it must be used on all processors that share the object for
4454   that argument.
4455 
4456   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4457   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4458 
4459   The user MUST specify either the local or global matrix dimensions
4460   (possibly both).
4461 
4462   The parallel matrix is partitioned across processors such that the
4463   first `m0` rows belong to process 0, the next `m1` rows belong to
4464   process 1, the next `m2` rows belong to process 2, etc., where
4465   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4466   values corresponding to [m x N] submatrix.
4467 
4468   The columns are logically partitioned with the n0 columns belonging
4469   to 0th partition, the next n1 columns belonging to the next
4470   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4471 
4472   The DIAGONAL portion of the local submatrix on any given processor
4473   is the submatrix corresponding to the rows and columns m,n
4474   corresponding to the given processor. i.e diagonal matrix on
4475   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4476   etc. The remaining portion of the local submatrix [m x (N-n)]
4477   constitute the OFF-DIAGONAL portion. The example below better
4478   illustrates this concept. The two matrices, the DIAGONAL portion and
4479   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4480 
4481   For a square global matrix we define each processor's diagonal portion
4482   to be its local rows and the corresponding columns (a square submatrix);
4483   each processor's off-diagonal portion encompasses the remainder of the
4484   local matrix (a rectangular submatrix).
4485 
4486   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4487 
4488   When calling this routine with a single process communicator, a matrix of
4489   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4490   type of communicator, use the construction mechanism
4491 .vb
4492   MatCreate(..., &A);
4493   MatSetType(A, MATMPIAIJ);
4494   MatSetSizes(A, m, n, M, N);
4495   MatMPIAIJSetPreallocation(A, ...);
4496 .ve
4497 
4498   By default, this format uses inodes (identical nodes) when possible.
4499   We search for consecutive rows with the same nonzero structure, thereby
4500   reusing matrix information to achieve increased efficiency.
4501 
4502   Example Usage:
4503   Consider the following 8x8 matrix with 34 non-zero values, that is
4504   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4505   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4506   as follows
4507 
4508 .vb
4509             1  2  0  |  0  3  0  |  0  4
4510     Proc0   0  5  6  |  7  0  0  |  8  0
4511             9  0 10  | 11  0  0  | 12  0
4512     -------------------------------------
4513            13  0 14  | 15 16 17  |  0  0
4514     Proc1   0 18  0  | 19 20 21  |  0  0
4515             0  0  0  | 22 23  0  | 24  0
4516     -------------------------------------
4517     Proc2  25 26 27  |  0  0 28  | 29  0
4518            30  0  0  | 31 32 33  |  0 34
4519 .ve
4520 
4521   This can be represented as a collection of submatrices as
4522 
4523 .vb
4524       A B C
4525       D E F
4526       G H I
4527 .ve
4528 
4529   Where the submatrices A,B,C are owned by proc0, D,E,F are
4530   owned by proc1, G,H,I are owned by proc2.
4531 
4532   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4533   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4534   The 'M','N' parameters are 8,8, and have the same values on all procs.
4535 
4536   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4537   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4538   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4539   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4540   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4541   matrix, and [DF] as another SeqAIJ matrix.
4542 
4543   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4544   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4545   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4546   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4547   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4548   In this case, the values of `d_nz`,`o_nz` are
4549 .vb
4550      proc0  dnz = 2, o_nz = 2
4551      proc1  dnz = 3, o_nz = 2
4552      proc2  dnz = 1, o_nz = 4
4553 .ve
4554   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4555   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4556   for proc3. i.e we are using 12+15+10=37 storage locations to store
4557   34 values.
4558 
4559   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4560   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4561   In the above case the values for d_nnz,o_nnz are
4562 .vb
4563      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4564      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4565      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4566 .ve
4567   Here the space allocated is sum of all the above values i.e 34, and
4568   hence pre-allocation is perfect.
4569 
4570 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4571           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4572           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4573 @*/
4574 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4575 {
4576   PetscMPIInt size;
4577 
4578   PetscFunctionBegin;
4579   PetscCall(MatCreate(comm, A));
4580   PetscCall(MatSetSizes(*A, m, n, M, N));
4581   PetscCallMPI(MPI_Comm_size(comm, &size));
4582   if (size > 1) {
4583     PetscCall(MatSetType(*A, MATMPIAIJ));
4584     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4585   } else {
4586     PetscCall(MatSetType(*A, MATSEQAIJ));
4587     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4588   }
4589   PetscFunctionReturn(PETSC_SUCCESS);
4590 }
4591 
4592 /*@C
4593   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4594 
4595   Not Collective
4596 
4597   Input Parameter:
4598 . A - The `MATMPIAIJ` matrix
4599 
4600   Output Parameters:
4601 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4602 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4603 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4604 
4605   Level: intermediate
4606 
4607   Note:
4608   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4609   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4610   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4611   local column numbers to global column numbers in the original matrix.
4612 
4613 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4614 @*/
4615 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4616 {
4617   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4618   PetscBool   flg;
4619 
4620   PetscFunctionBegin;
4621   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4622   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4623   if (Ad) *Ad = a->A;
4624   if (Ao) *Ao = a->B;
4625   if (colmap) *colmap = a->garray;
4626   PetscFunctionReturn(PETSC_SUCCESS);
4627 }
4628 
4629 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4630 {
4631   PetscInt     m, N, i, rstart, nnz, Ii;
4632   PetscInt    *indx;
4633   PetscScalar *values;
4634   MatType      rootType;
4635 
4636   PetscFunctionBegin;
4637   PetscCall(MatGetSize(inmat, &m, &N));
4638   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4639     PetscInt *dnz, *onz, sum, bs, cbs;
4640 
4641     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4642     /* Check sum(n) = N */
4643     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4644     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4645 
4646     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4647     rstart -= m;
4648 
4649     MatPreallocateBegin(comm, m, n, dnz, onz);
4650     for (i = 0; i < m; i++) {
4651       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4652       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4653       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4654     }
4655 
4656     PetscCall(MatCreate(comm, outmat));
4657     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4658     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4659     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4660     PetscCall(MatGetRootType_Private(inmat, &rootType));
4661     PetscCall(MatSetType(*outmat, rootType));
4662     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4663     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4664     MatPreallocateEnd(dnz, onz);
4665     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4666   }
4667 
4668   /* numeric phase */
4669   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4670   for (i = 0; i < m; i++) {
4671     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4672     Ii = i + rstart;
4673     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4674     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4675   }
4676   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4677   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4678   PetscFunctionReturn(PETSC_SUCCESS);
4679 }
4680 
4681 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data)
4682 {
4683   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data;
4684 
4685   PetscFunctionBegin;
4686   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4687   PetscCall(PetscFree(merge->id_r));
4688   PetscCall(PetscFree(merge->len_s));
4689   PetscCall(PetscFree(merge->len_r));
4690   PetscCall(PetscFree(merge->bi));
4691   PetscCall(PetscFree(merge->bj));
4692   PetscCall(PetscFree(merge->buf_ri[0]));
4693   PetscCall(PetscFree(merge->buf_ri));
4694   PetscCall(PetscFree(merge->buf_rj[0]));
4695   PetscCall(PetscFree(merge->buf_rj));
4696   PetscCall(PetscFree(merge->coi));
4697   PetscCall(PetscFree(merge->coj));
4698   PetscCall(PetscFree(merge->owners_co));
4699   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4700   PetscCall(PetscFree(merge));
4701   PetscFunctionReturn(PETSC_SUCCESS);
4702 }
4703 
4704 #include <../src/mat/utils/freespace.h>
4705 #include <petscbt.h>
4706 
4707 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4708 {
4709   MPI_Comm             comm;
4710   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4711   PetscMPIInt          size, rank, taga, *len_s;
4712   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4713   PetscMPIInt          proc, k;
4714   PetscInt           **buf_ri, **buf_rj;
4715   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4716   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4717   MPI_Request         *s_waits, *r_waits;
4718   MPI_Status          *status;
4719   const MatScalar     *aa, *a_a;
4720   MatScalar          **abuf_r, *ba_i;
4721   Mat_Merge_SeqsToMPI *merge;
4722   PetscContainer       container;
4723 
4724   PetscFunctionBegin;
4725   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4726   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4727 
4728   PetscCallMPI(MPI_Comm_size(comm, &size));
4729   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4730 
4731   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4732   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4733   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4734   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4735   aa = a_a;
4736 
4737   bi     = merge->bi;
4738   bj     = merge->bj;
4739   buf_ri = merge->buf_ri;
4740   buf_rj = merge->buf_rj;
4741 
4742   PetscCall(PetscMalloc1(size, &status));
4743   owners = merge->rowmap->range;
4744   len_s  = merge->len_s;
4745 
4746   /* send and recv matrix values */
4747   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4748   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4749 
4750   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4751   for (proc = 0, k = 0; proc < size; proc++) {
4752     if (!len_s[proc]) continue;
4753     i = owners[proc];
4754     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4755     k++;
4756   }
4757 
4758   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4759   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4760   PetscCall(PetscFree(status));
4761 
4762   PetscCall(PetscFree(s_waits));
4763   PetscCall(PetscFree(r_waits));
4764 
4765   /* insert mat values of mpimat */
4766   PetscCall(PetscMalloc1(N, &ba_i));
4767   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4768 
4769   for (k = 0; k < merge->nrecv; k++) {
4770     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4771     nrows       = *buf_ri_k[k];
4772     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4773     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4774   }
4775 
4776   /* set values of ba */
4777   m = merge->rowmap->n;
4778   for (i = 0; i < m; i++) {
4779     arow = owners[rank] + i;
4780     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4781     bnzi = bi[i + 1] - bi[i];
4782     PetscCall(PetscArrayzero(ba_i, bnzi));
4783 
4784     /* add local non-zero vals of this proc's seqmat into ba */
4785     anzi   = ai[arow + 1] - ai[arow];
4786     aj     = a->j + ai[arow];
4787     aa     = a_a + ai[arow];
4788     nextaj = 0;
4789     for (j = 0; nextaj < anzi; j++) {
4790       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4791         ba_i[j] += aa[nextaj++];
4792       }
4793     }
4794 
4795     /* add received vals into ba */
4796     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4797       /* i-th row */
4798       if (i == *nextrow[k]) {
4799         anzi   = *(nextai[k] + 1) - *nextai[k];
4800         aj     = buf_rj[k] + *nextai[k];
4801         aa     = abuf_r[k] + *nextai[k];
4802         nextaj = 0;
4803         for (j = 0; nextaj < anzi; j++) {
4804           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4805             ba_i[j] += aa[nextaj++];
4806           }
4807         }
4808         nextrow[k]++;
4809         nextai[k]++;
4810       }
4811     }
4812     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4813   }
4814   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4815   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4816   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4817 
4818   PetscCall(PetscFree(abuf_r[0]));
4819   PetscCall(PetscFree(abuf_r));
4820   PetscCall(PetscFree(ba_i));
4821   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4822   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4823   PetscFunctionReturn(PETSC_SUCCESS);
4824 }
4825 
4826 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4827 {
4828   Mat                  B_mpi;
4829   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4830   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4831   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4832   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4833   PetscInt             len, *dnz, *onz, bs, cbs;
4834   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4835   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4836   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4837   MPI_Status          *status;
4838   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4839   PetscBT              lnkbt;
4840   Mat_Merge_SeqsToMPI *merge;
4841   PetscContainer       container;
4842 
4843   PetscFunctionBegin;
4844   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4845 
4846   /* make sure it is a PETSc comm */
4847   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4848   PetscCallMPI(MPI_Comm_size(comm, &size));
4849   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4850 
4851   PetscCall(PetscNew(&merge));
4852   PetscCall(PetscMalloc1(size, &status));
4853 
4854   /* determine row ownership */
4855   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4856   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4857   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4858   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4859   PetscCall(PetscLayoutSetUp(merge->rowmap));
4860   PetscCall(PetscMalloc1(size, &len_si));
4861   PetscCall(PetscMalloc1(size, &merge->len_s));
4862 
4863   m      = merge->rowmap->n;
4864   owners = merge->rowmap->range;
4865 
4866   /* determine the number of messages to send, their lengths */
4867   len_s = merge->len_s;
4868 
4869   len          = 0; /* length of buf_si[] */
4870   merge->nsend = 0;
4871   for (PetscMPIInt proc = 0; proc < size; proc++) {
4872     len_si[proc] = 0;
4873     if (proc == rank) {
4874       len_s[proc] = 0;
4875     } else {
4876       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4877       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4878     }
4879     if (len_s[proc]) {
4880       merge->nsend++;
4881       nrows = 0;
4882       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4883         if (ai[i + 1] > ai[i]) nrows++;
4884       }
4885       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4886       len += len_si[proc];
4887     }
4888   }
4889 
4890   /* determine the number and length of messages to receive for ij-structure */
4891   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4892   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4893 
4894   /* post the Irecv of j-structure */
4895   PetscCall(PetscCommGetNewTag(comm, &tagj));
4896   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4897 
4898   /* post the Isend of j-structure */
4899   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4900 
4901   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4902     if (!len_s[proc]) continue;
4903     i = owners[proc];
4904     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4905     k++;
4906   }
4907 
4908   /* receives and sends of j-structure are complete */
4909   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4910   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4911 
4912   /* send and recv i-structure */
4913   PetscCall(PetscCommGetNewTag(comm, &tagi));
4914   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4915 
4916   PetscCall(PetscMalloc1(len + 1, &buf_s));
4917   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4918   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4919     if (!len_s[proc]) continue;
4920     /* form outgoing message for i-structure:
4921          buf_si[0]:                 nrows to be sent
4922                [1:nrows]:           row index (global)
4923                [nrows+1:2*nrows+1]: i-structure index
4924     */
4925     nrows       = len_si[proc] / 2 - 1;
4926     buf_si_i    = buf_si + nrows + 1;
4927     buf_si[0]   = nrows;
4928     buf_si_i[0] = 0;
4929     nrows       = 0;
4930     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4931       anzi = ai[i + 1] - ai[i];
4932       if (anzi) {
4933         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4934         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4935         nrows++;
4936       }
4937     }
4938     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4939     k++;
4940     buf_si += len_si[proc];
4941   }
4942 
4943   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4944   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4945 
4946   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4947   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4948 
4949   PetscCall(PetscFree(len_si));
4950   PetscCall(PetscFree(len_ri));
4951   PetscCall(PetscFree(rj_waits));
4952   PetscCall(PetscFree2(si_waits, sj_waits));
4953   PetscCall(PetscFree(ri_waits));
4954   PetscCall(PetscFree(buf_s));
4955   PetscCall(PetscFree(status));
4956 
4957   /* compute a local seq matrix in each processor */
4958   /* allocate bi array and free space for accumulating nonzero column info */
4959   PetscCall(PetscMalloc1(m + 1, &bi));
4960   bi[0] = 0;
4961 
4962   /* create and initialize a linked list */
4963   nlnk = N + 1;
4964   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4965 
4966   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4967   len = ai[owners[rank + 1]] - ai[owners[rank]];
4968   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4969 
4970   current_space = free_space;
4971 
4972   /* determine symbolic info for each local row */
4973   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4974 
4975   for (k = 0; k < merge->nrecv; k++) {
4976     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4977     nrows       = *buf_ri_k[k];
4978     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4979     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4980   }
4981 
4982   MatPreallocateBegin(comm, m, n, dnz, onz);
4983   len = 0;
4984   for (i = 0; i < m; i++) {
4985     bnzi = 0;
4986     /* add local non-zero cols of this proc's seqmat into lnk */
4987     arow = owners[rank] + i;
4988     anzi = ai[arow + 1] - ai[arow];
4989     aj   = a->j + ai[arow];
4990     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4991     bnzi += nlnk;
4992     /* add received col data into lnk */
4993     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4994       if (i == *nextrow[k]) {            /* i-th row */
4995         anzi = *(nextai[k] + 1) - *nextai[k];
4996         aj   = buf_rj[k] + *nextai[k];
4997         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4998         bnzi += nlnk;
4999         nextrow[k]++;
5000         nextai[k]++;
5001       }
5002     }
5003     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5004 
5005     /* if free space is not available, make more free space */
5006     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5007     /* copy data into free space, then initialize lnk */
5008     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5009     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5010 
5011     current_space->array += bnzi;
5012     current_space->local_used += bnzi;
5013     current_space->local_remaining -= bnzi;
5014 
5015     bi[i + 1] = bi[i] + bnzi;
5016   }
5017 
5018   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5019 
5020   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5021   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5022   PetscCall(PetscLLDestroy(lnk, lnkbt));
5023 
5024   /* create symbolic parallel matrix B_mpi */
5025   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5026   PetscCall(MatCreate(comm, &B_mpi));
5027   if (n == PETSC_DECIDE) {
5028     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5029   } else {
5030     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5031   }
5032   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5033   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5034   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5035   MatPreallocateEnd(dnz, onz);
5036   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5037 
5038   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5039   B_mpi->assembled = PETSC_FALSE;
5040   merge->bi        = bi;
5041   merge->bj        = bj;
5042   merge->buf_ri    = buf_ri;
5043   merge->buf_rj    = buf_rj;
5044   merge->coi       = NULL;
5045   merge->coj       = NULL;
5046   merge->owners_co = NULL;
5047 
5048   PetscCall(PetscCommDestroy(&comm));
5049 
5050   /* attach the supporting struct to B_mpi for reuse */
5051   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5052   PetscCall(PetscContainerSetPointer(container, merge));
5053   PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5054   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5055   PetscCall(PetscContainerDestroy(&container));
5056   *mpimat = B_mpi;
5057 
5058   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5059   PetscFunctionReturn(PETSC_SUCCESS);
5060 }
5061 
5062 /*@
5063   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5064   matrices from each processor
5065 
5066   Collective
5067 
5068   Input Parameters:
5069 + comm   - the communicators the parallel matrix will live on
5070 . seqmat - the input sequential matrices
5071 . m      - number of local rows (or `PETSC_DECIDE`)
5072 . n      - number of local columns (or `PETSC_DECIDE`)
5073 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5074 
5075   Output Parameter:
5076 . mpimat - the parallel matrix generated
5077 
5078   Level: advanced
5079 
5080   Note:
5081   The dimensions of the sequential matrix in each processor MUST be the same.
5082   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5083   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5084 
5085 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5086 @*/
5087 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5088 {
5089   PetscMPIInt size;
5090 
5091   PetscFunctionBegin;
5092   PetscCallMPI(MPI_Comm_size(comm, &size));
5093   if (size == 1) {
5094     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5095     if (scall == MAT_INITIAL_MATRIX) {
5096       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5097     } else {
5098       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5099     }
5100     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5101     PetscFunctionReturn(PETSC_SUCCESS);
5102   }
5103   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5104   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5105   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5106   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5107   PetscFunctionReturn(PETSC_SUCCESS);
5108 }
5109 
5110 /*@
5111   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5112 
5113   Not Collective
5114 
5115   Input Parameter:
5116 . A - the matrix
5117 
5118   Output Parameter:
5119 . A_loc - the local sequential matrix generated
5120 
5121   Level: developer
5122 
5123   Notes:
5124   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5125   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5126   `n` is the global column count obtained with `MatGetSize()`
5127 
5128   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5129 
5130   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5131 
5132   Destroy the matrix with `MatDestroy()`
5133 
5134 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5135 @*/
5136 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5137 {
5138   PetscBool mpi;
5139 
5140   PetscFunctionBegin;
5141   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5142   if (mpi) {
5143     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5144   } else {
5145     *A_loc = A;
5146     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5147   }
5148   PetscFunctionReturn(PETSC_SUCCESS);
5149 }
5150 
5151 /*@
5152   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5153 
5154   Not Collective
5155 
5156   Input Parameters:
5157 + A     - the matrix
5158 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5159 
5160   Output Parameter:
5161 . A_loc - the local sequential matrix generated
5162 
5163   Level: developer
5164 
5165   Notes:
5166   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5167   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5168   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5169 
5170   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5171 
5172   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5173   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5174   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5175   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5176 
5177 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5178 @*/
5179 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5180 {
5181   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5182   Mat_SeqAIJ        *mat, *a, *b;
5183   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5184   const PetscScalar *aa, *ba, *aav, *bav;
5185   PetscScalar       *ca, *cam;
5186   PetscMPIInt        size;
5187   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5188   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5189   PetscBool          match;
5190 
5191   PetscFunctionBegin;
5192   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5193   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5194   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5195   if (size == 1) {
5196     if (scall == MAT_INITIAL_MATRIX) {
5197       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5198       *A_loc = mpimat->A;
5199     } else if (scall == MAT_REUSE_MATRIX) {
5200       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5201     }
5202     PetscFunctionReturn(PETSC_SUCCESS);
5203   }
5204 
5205   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5206   a  = (Mat_SeqAIJ *)mpimat->A->data;
5207   b  = (Mat_SeqAIJ *)mpimat->B->data;
5208   ai = a->i;
5209   aj = a->j;
5210   bi = b->i;
5211   bj = b->j;
5212   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5213   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5214   aa = aav;
5215   ba = bav;
5216   if (scall == MAT_INITIAL_MATRIX) {
5217     PetscCall(PetscMalloc1(1 + am, &ci));
5218     ci[0] = 0;
5219     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5220     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5221     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5222     k = 0;
5223     for (i = 0; i < am; i++) {
5224       ncols_o = bi[i + 1] - bi[i];
5225       ncols_d = ai[i + 1] - ai[i];
5226       /* off-diagonal portion of A */
5227       for (jo = 0; jo < ncols_o; jo++) {
5228         col = cmap[*bj];
5229         if (col >= cstart) break;
5230         cj[k] = col;
5231         bj++;
5232         ca[k++] = *ba++;
5233       }
5234       /* diagonal portion of A */
5235       for (j = 0; j < ncols_d; j++) {
5236         cj[k]   = cstart + *aj++;
5237         ca[k++] = *aa++;
5238       }
5239       /* off-diagonal portion of A */
5240       for (j = jo; j < ncols_o; j++) {
5241         cj[k]   = cmap[*bj++];
5242         ca[k++] = *ba++;
5243       }
5244     }
5245     /* put together the new matrix */
5246     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5247     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5248     /* Since these are PETSc arrays, change flags to free them as necessary. */
5249     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5250     mat->free_a  = PETSC_TRUE;
5251     mat->free_ij = PETSC_TRUE;
5252     mat->nonew   = 0;
5253   } else if (scall == MAT_REUSE_MATRIX) {
5254     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5255     ci  = mat->i;
5256     cj  = mat->j;
5257     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5258     for (i = 0; i < am; i++) {
5259       /* off-diagonal portion of A */
5260       ncols_o = bi[i + 1] - bi[i];
5261       for (jo = 0; jo < ncols_o; jo++) {
5262         col = cmap[*bj];
5263         if (col >= cstart) break;
5264         *cam++ = *ba++;
5265         bj++;
5266       }
5267       /* diagonal portion of A */
5268       ncols_d = ai[i + 1] - ai[i];
5269       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5270       /* off-diagonal portion of A */
5271       for (j = jo; j < ncols_o; j++) {
5272         *cam++ = *ba++;
5273         bj++;
5274       }
5275     }
5276     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5277   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5278   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5279   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5280   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5281   PetscFunctionReturn(PETSC_SUCCESS);
5282 }
5283 
5284 /*@
5285   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5286   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5287 
5288   Not Collective
5289 
5290   Input Parameters:
5291 + A     - the matrix
5292 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5293 
5294   Output Parameters:
5295 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5296 - A_loc - the local sequential matrix generated
5297 
5298   Level: developer
5299 
5300   Note:
5301   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5302   part, then those associated with the off-diagonal part (in its local ordering)
5303 
5304 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5305 @*/
5306 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5307 {
5308   Mat             Ao, Ad;
5309   const PetscInt *cmap;
5310   PetscMPIInt     size;
5311   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5312 
5313   PetscFunctionBegin;
5314   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5315   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5316   if (size == 1) {
5317     if (scall == MAT_INITIAL_MATRIX) {
5318       PetscCall(PetscObjectReference((PetscObject)Ad));
5319       *A_loc = Ad;
5320     } else if (scall == MAT_REUSE_MATRIX) {
5321       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5322     }
5323     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5324     PetscFunctionReturn(PETSC_SUCCESS);
5325   }
5326   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5327   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5328   if (f) {
5329     PetscCall((*f)(A, scall, glob, A_loc));
5330   } else {
5331     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5332     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5333     Mat_SeqAIJ        *c;
5334     PetscInt          *ai = a->i, *aj = a->j;
5335     PetscInt          *bi = b->i, *bj = b->j;
5336     PetscInt          *ci, *cj;
5337     const PetscScalar *aa, *ba;
5338     PetscScalar       *ca;
5339     PetscInt           i, j, am, dn, on;
5340 
5341     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5342     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5343     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5344     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5345     if (scall == MAT_INITIAL_MATRIX) {
5346       PetscInt k;
5347       PetscCall(PetscMalloc1(1 + am, &ci));
5348       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5349       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5350       ci[0] = 0;
5351       for (i = 0, k = 0; i < am; i++) {
5352         const PetscInt ncols_o = bi[i + 1] - bi[i];
5353         const PetscInt ncols_d = ai[i + 1] - ai[i];
5354         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5355         /* diagonal portion of A */
5356         for (j = 0; j < ncols_d; j++, k++) {
5357           cj[k] = *aj++;
5358           ca[k] = *aa++;
5359         }
5360         /* off-diagonal portion of A */
5361         for (j = 0; j < ncols_o; j++, k++) {
5362           cj[k] = dn + *bj++;
5363           ca[k] = *ba++;
5364         }
5365       }
5366       /* put together the new matrix */
5367       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5368       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5369       /* Since these are PETSc arrays, change flags to free them as necessary. */
5370       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5371       c->free_a  = PETSC_TRUE;
5372       c->free_ij = PETSC_TRUE;
5373       c->nonew   = 0;
5374       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5375     } else if (scall == MAT_REUSE_MATRIX) {
5376       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5377       for (i = 0; i < am; i++) {
5378         const PetscInt ncols_d = ai[i + 1] - ai[i];
5379         const PetscInt ncols_o = bi[i + 1] - bi[i];
5380         /* diagonal portion of A */
5381         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5382         /* off-diagonal portion of A */
5383         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5384       }
5385       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5386     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5387     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5388     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5389     if (glob) {
5390       PetscInt cst, *gidx;
5391 
5392       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5393       PetscCall(PetscMalloc1(dn + on, &gidx));
5394       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5395       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5396       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5397     }
5398   }
5399   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5400   PetscFunctionReturn(PETSC_SUCCESS);
5401 }
5402 
5403 /*@C
5404   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5405 
5406   Not Collective
5407 
5408   Input Parameters:
5409 + A     - the matrix
5410 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5411 . row   - index set of rows to extract (or `NULL`)
5412 - col   - index set of columns to extract (or `NULL`)
5413 
5414   Output Parameter:
5415 . A_loc - the local sequential matrix generated
5416 
5417   Level: developer
5418 
5419 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5420 @*/
5421 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5422 {
5423   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5424   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5425   IS          isrowa, iscola;
5426   Mat        *aloc;
5427   PetscBool   match;
5428 
5429   PetscFunctionBegin;
5430   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5431   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5432   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5433   if (!row) {
5434     start = A->rmap->rstart;
5435     end   = A->rmap->rend;
5436     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5437   } else {
5438     isrowa = *row;
5439   }
5440   if (!col) {
5441     start = A->cmap->rstart;
5442     cmap  = a->garray;
5443     nzA   = a->A->cmap->n;
5444     nzB   = a->B->cmap->n;
5445     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5446     ncols = 0;
5447     for (i = 0; i < nzB; i++) {
5448       if (cmap[i] < start) idx[ncols++] = cmap[i];
5449       else break;
5450     }
5451     imark = i;
5452     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5453     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5454     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5455   } else {
5456     iscola = *col;
5457   }
5458   if (scall != MAT_INITIAL_MATRIX) {
5459     PetscCall(PetscMalloc1(1, &aloc));
5460     aloc[0] = *A_loc;
5461   }
5462   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5463   if (!col) { /* attach global id of condensed columns */
5464     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5465   }
5466   *A_loc = aloc[0];
5467   PetscCall(PetscFree(aloc));
5468   if (!row) PetscCall(ISDestroy(&isrowa));
5469   if (!col) PetscCall(ISDestroy(&iscola));
5470   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5471   PetscFunctionReturn(PETSC_SUCCESS);
5472 }
5473 
5474 /*
5475  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5476  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5477  * on a global size.
5478  * */
5479 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5480 {
5481   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5482   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5483   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5484   PetscMPIInt            owner;
5485   PetscSFNode           *iremote, *oiremote;
5486   const PetscInt        *lrowindices;
5487   PetscSF                sf, osf;
5488   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5489   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5490   MPI_Comm               comm;
5491   ISLocalToGlobalMapping mapping;
5492   const PetscScalar     *pd_a, *po_a;
5493 
5494   PetscFunctionBegin;
5495   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5496   /* plocalsize is the number of roots
5497    * nrows is the number of leaves
5498    * */
5499   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5500   PetscCall(ISGetLocalSize(rows, &nrows));
5501   PetscCall(PetscCalloc1(nrows, &iremote));
5502   PetscCall(ISGetIndices(rows, &lrowindices));
5503   for (i = 0; i < nrows; i++) {
5504     /* Find a remote index and an owner for a row
5505      * The row could be local or remote
5506      * */
5507     owner = 0;
5508     lidx  = 0;
5509     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5510     iremote[i].index = lidx;
5511     iremote[i].rank  = owner;
5512   }
5513   /* Create SF to communicate how many nonzero columns for each row */
5514   PetscCall(PetscSFCreate(comm, &sf));
5515   /* SF will figure out the number of nonzero columns for each row, and their
5516    * offsets
5517    * */
5518   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5519   PetscCall(PetscSFSetFromOptions(sf));
5520   PetscCall(PetscSFSetUp(sf));
5521 
5522   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5523   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5524   PetscCall(PetscCalloc1(nrows, &pnnz));
5525   roffsets[0] = 0;
5526   roffsets[1] = 0;
5527   for (i = 0; i < plocalsize; i++) {
5528     /* diagonal */
5529     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5530     /* off-diagonal */
5531     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5532     /* compute offsets so that we relative location for each row */
5533     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5534     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5535   }
5536   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5537   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5538   /* 'r' means root, and 'l' means leaf */
5539   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5540   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5541   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5542   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5543   PetscCall(PetscSFDestroy(&sf));
5544   PetscCall(PetscFree(roffsets));
5545   PetscCall(PetscFree(nrcols));
5546   dntotalcols = 0;
5547   ontotalcols = 0;
5548   ncol        = 0;
5549   for (i = 0; i < nrows; i++) {
5550     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5551     ncol    = PetscMax(pnnz[i], ncol);
5552     /* diagonal */
5553     dntotalcols += nlcols[i * 2 + 0];
5554     /* off-diagonal */
5555     ontotalcols += nlcols[i * 2 + 1];
5556   }
5557   /* We do not need to figure the right number of columns
5558    * since all the calculations will be done by going through the raw data
5559    * */
5560   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5561   PetscCall(MatSetUp(*P_oth));
5562   PetscCall(PetscFree(pnnz));
5563   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5564   /* diagonal */
5565   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5566   /* off-diagonal */
5567   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5568   /* diagonal */
5569   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5570   /* off-diagonal */
5571   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5572   dntotalcols = 0;
5573   ontotalcols = 0;
5574   ntotalcols  = 0;
5575   for (i = 0; i < nrows; i++) {
5576     owner = 0;
5577     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5578     /* Set iremote for diag matrix */
5579     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5580       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5581       iremote[dntotalcols].rank  = owner;
5582       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5583       ilocal[dntotalcols++] = ntotalcols++;
5584     }
5585     /* off-diagonal */
5586     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5587       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5588       oiremote[ontotalcols].rank  = owner;
5589       oilocal[ontotalcols++]      = ntotalcols++;
5590     }
5591   }
5592   PetscCall(ISRestoreIndices(rows, &lrowindices));
5593   PetscCall(PetscFree(loffsets));
5594   PetscCall(PetscFree(nlcols));
5595   PetscCall(PetscSFCreate(comm, &sf));
5596   /* P serves as roots and P_oth is leaves
5597    * Diag matrix
5598    * */
5599   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5600   PetscCall(PetscSFSetFromOptions(sf));
5601   PetscCall(PetscSFSetUp(sf));
5602 
5603   PetscCall(PetscSFCreate(comm, &osf));
5604   /* off-diagonal */
5605   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5606   PetscCall(PetscSFSetFromOptions(osf));
5607   PetscCall(PetscSFSetUp(osf));
5608   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5609   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5610   /* operate on the matrix internal data to save memory */
5611   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5612   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5613   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5614   /* Convert to global indices for diag matrix */
5615   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5616   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5617   /* We want P_oth store global indices */
5618   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5619   /* Use memory scalable approach */
5620   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5621   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5622   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5623   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5624   /* Convert back to local indices */
5625   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5626   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5627   nout = 0;
5628   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5629   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5630   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5631   /* Exchange values */
5632   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5633   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5634   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5635   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5636   /* Stop PETSc from shrinking memory */
5637   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5638   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5639   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5640   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5641   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5642   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5643   PetscCall(PetscSFDestroy(&sf));
5644   PetscCall(PetscSFDestroy(&osf));
5645   PetscFunctionReturn(PETSC_SUCCESS);
5646 }
5647 
5648 /*
5649  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5650  * This supports MPIAIJ and MAIJ
5651  * */
5652 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5653 {
5654   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5655   Mat_SeqAIJ *p_oth;
5656   IS          rows, map;
5657   PetscHMapI  hamp;
5658   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5659   MPI_Comm    comm;
5660   PetscSF     sf, osf;
5661   PetscBool   has;
5662 
5663   PetscFunctionBegin;
5664   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5665   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5666   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5667    *  and then create a submatrix (that often is an overlapping matrix)
5668    * */
5669   if (reuse == MAT_INITIAL_MATRIX) {
5670     /* Use a hash table to figure out unique keys */
5671     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5672     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5673     count = 0;
5674     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5675     for (i = 0; i < a->B->cmap->n; i++) {
5676       key = a->garray[i] / dof;
5677       PetscCall(PetscHMapIHas(hamp, key, &has));
5678       if (!has) {
5679         mapping[i] = count;
5680         PetscCall(PetscHMapISet(hamp, key, count++));
5681       } else {
5682         /* Current 'i' has the same value the previous step */
5683         mapping[i] = count - 1;
5684       }
5685     }
5686     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5687     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5688     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5689     PetscCall(PetscCalloc1(htsize, &rowindices));
5690     off = 0;
5691     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5692     PetscCall(PetscHMapIDestroy(&hamp));
5693     PetscCall(PetscSortInt(htsize, rowindices));
5694     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5695     /* In case, the matrix was already created but users want to recreate the matrix */
5696     PetscCall(MatDestroy(P_oth));
5697     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5698     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5699     PetscCall(ISDestroy(&map));
5700     PetscCall(ISDestroy(&rows));
5701   } else if (reuse == MAT_REUSE_MATRIX) {
5702     /* If matrix was already created, we simply update values using SF objects
5703      * that as attached to the matrix earlier.
5704      */
5705     const PetscScalar *pd_a, *po_a;
5706 
5707     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5708     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5709     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5710     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5711     /* Update values in place */
5712     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5713     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5714     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5715     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5716     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5717     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5718     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5719     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5720   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5721   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5722   PetscFunctionReturn(PETSC_SUCCESS);
5723 }
5724 
5725 /*@C
5726   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5727 
5728   Collective
5729 
5730   Input Parameters:
5731 + A     - the first matrix in `MATMPIAIJ` format
5732 . B     - the second matrix in `MATMPIAIJ` format
5733 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5734 
5735   Output Parameters:
5736 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5737 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5738 - B_seq - the sequential matrix generated
5739 
5740   Level: developer
5741 
5742 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5743 @*/
5744 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5745 {
5746   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5747   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5748   IS          isrowb, iscolb;
5749   Mat        *bseq = NULL;
5750 
5751   PetscFunctionBegin;
5752   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5753              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5754   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5755 
5756   if (scall == MAT_INITIAL_MATRIX) {
5757     start = A->cmap->rstart;
5758     cmap  = a->garray;
5759     nzA   = a->A->cmap->n;
5760     nzB   = a->B->cmap->n;
5761     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5762     ncols = 0;
5763     for (i = 0; i < nzB; i++) { /* row < local row index */
5764       if (cmap[i] < start) idx[ncols++] = cmap[i];
5765       else break;
5766     }
5767     imark = i;
5768     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5769     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5770     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5771     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5772   } else {
5773     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5774     isrowb = *rowb;
5775     iscolb = *colb;
5776     PetscCall(PetscMalloc1(1, &bseq));
5777     bseq[0] = *B_seq;
5778   }
5779   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5780   *B_seq = bseq[0];
5781   PetscCall(PetscFree(bseq));
5782   if (!rowb) {
5783     PetscCall(ISDestroy(&isrowb));
5784   } else {
5785     *rowb = isrowb;
5786   }
5787   if (!colb) {
5788     PetscCall(ISDestroy(&iscolb));
5789   } else {
5790     *colb = iscolb;
5791   }
5792   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5793   PetscFunctionReturn(PETSC_SUCCESS);
5794 }
5795 
5796 /*
5797     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5798     of the OFF-DIAGONAL portion of local A
5799 
5800     Collective
5801 
5802    Input Parameters:
5803 +    A,B - the matrices in `MATMPIAIJ` format
5804 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5805 
5806    Output Parameter:
5807 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5808 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5809 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5810 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5811 
5812     Developer Note:
5813     This directly accesses information inside the VecScatter associated with the matrix-vector product
5814      for this matrix. This is not desirable..
5815 
5816     Level: developer
5817 
5818 */
5819 
5820 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5821 {
5822   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5823   VecScatter         ctx;
5824   MPI_Comm           comm;
5825   const PetscMPIInt *rprocs, *sprocs;
5826   PetscMPIInt        nrecvs, nsends;
5827   const PetscInt    *srow, *rstarts, *sstarts;
5828   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5829   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5830   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5831   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5832   PetscMPIInt        size, tag, rank, nreqs;
5833 
5834   PetscFunctionBegin;
5835   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5836   PetscCallMPI(MPI_Comm_size(comm, &size));
5837 
5838   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5839              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5840   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5841   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5842 
5843   if (size == 1) {
5844     startsj_s = NULL;
5845     bufa_ptr  = NULL;
5846     *B_oth    = NULL;
5847     PetscFunctionReturn(PETSC_SUCCESS);
5848   }
5849 
5850   ctx = a->Mvctx;
5851   tag = ((PetscObject)ctx)->tag;
5852 
5853   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5854   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5855   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5856   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5857   PetscCall(PetscMalloc1(nreqs, &reqs));
5858   rwaits = reqs;
5859   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5860 
5861   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5862   if (scall == MAT_INITIAL_MATRIX) {
5863     /* i-array */
5864     /*  post receives */
5865     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5866     for (i = 0; i < nrecvs; i++) {
5867       rowlen = rvalues + rstarts[i] * rbs;
5868       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5869       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5870     }
5871 
5872     /* pack the outgoing message */
5873     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5874 
5875     sstartsj[0] = 0;
5876     rstartsj[0] = 0;
5877     len         = 0; /* total length of j or a array to be sent */
5878     if (nsends) {
5879       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5880       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5881     }
5882     for (i = 0; i < nsends; i++) {
5883       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5884       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5885       for (j = 0; j < nrows; j++) {
5886         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5887         for (l = 0; l < sbs; l++) {
5888           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5889 
5890           rowlen[j * sbs + l] = ncols;
5891 
5892           len += ncols;
5893           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5894         }
5895         k++;
5896       }
5897       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5898 
5899       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5900     }
5901     /* recvs and sends of i-array are completed */
5902     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5903     PetscCall(PetscFree(svalues));
5904 
5905     /* allocate buffers for sending j and a arrays */
5906     PetscCall(PetscMalloc1(len + 1, &bufj));
5907     PetscCall(PetscMalloc1(len + 1, &bufa));
5908 
5909     /* create i-array of B_oth */
5910     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5911 
5912     b_othi[0] = 0;
5913     len       = 0; /* total length of j or a array to be received */
5914     k         = 0;
5915     for (i = 0; i < nrecvs; i++) {
5916       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5917       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5918       for (j = 0; j < nrows; j++) {
5919         b_othi[k + 1] = b_othi[k] + rowlen[j];
5920         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5921         k++;
5922       }
5923       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5924     }
5925     PetscCall(PetscFree(rvalues));
5926 
5927     /* allocate space for j and a arrays of B_oth */
5928     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5929     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5930 
5931     /* j-array */
5932     /*  post receives of j-array */
5933     for (i = 0; i < nrecvs; i++) {
5934       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5935       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5936     }
5937 
5938     /* pack the outgoing message j-array */
5939     if (nsends) k = sstarts[0];
5940     for (i = 0; i < nsends; i++) {
5941       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5942       bufJ  = bufj + sstartsj[i];
5943       for (j = 0; j < nrows; j++) {
5944         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5945         for (ll = 0; ll < sbs; ll++) {
5946           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5947           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5948           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5949         }
5950       }
5951       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5952     }
5953 
5954     /* recvs and sends of j-array are completed */
5955     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5956   } else if (scall == MAT_REUSE_MATRIX) {
5957     sstartsj = *startsj_s;
5958     rstartsj = *startsj_r;
5959     bufa     = *bufa_ptr;
5960     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5961   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5962 
5963   /* a-array */
5964   /*  post receives of a-array */
5965   for (i = 0; i < nrecvs; i++) {
5966     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5967     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5968   }
5969 
5970   /* pack the outgoing message a-array */
5971   if (nsends) k = sstarts[0];
5972   for (i = 0; i < nsends; i++) {
5973     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5974     bufA  = bufa + sstartsj[i];
5975     for (j = 0; j < nrows; j++) {
5976       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5977       for (ll = 0; ll < sbs; ll++) {
5978         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5979         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5980         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5981       }
5982     }
5983     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5984   }
5985   /* recvs and sends of a-array are completed */
5986   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5987   PetscCall(PetscFree(reqs));
5988 
5989   if (scall == MAT_INITIAL_MATRIX) {
5990     Mat_SeqAIJ *b_oth;
5991 
5992     /* put together the new matrix */
5993     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5994 
5995     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5996     /* Since these are PETSc arrays, change flags to free them as necessary. */
5997     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5998     b_oth->free_a  = PETSC_TRUE;
5999     b_oth->free_ij = PETSC_TRUE;
6000     b_oth->nonew   = 0;
6001 
6002     PetscCall(PetscFree(bufj));
6003     if (!startsj_s || !bufa_ptr) {
6004       PetscCall(PetscFree2(sstartsj, rstartsj));
6005       PetscCall(PetscFree(bufa_ptr));
6006     } else {
6007       *startsj_s = sstartsj;
6008       *startsj_r = rstartsj;
6009       *bufa_ptr  = bufa;
6010     }
6011   } else if (scall == MAT_REUSE_MATRIX) {
6012     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6013   }
6014 
6015   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6016   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6017   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6018   PetscFunctionReturn(PETSC_SUCCESS);
6019 }
6020 
6021 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6022 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6023 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6024 #if defined(PETSC_HAVE_MKL_SPARSE)
6025 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6026 #endif
6027 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6029 #if defined(PETSC_HAVE_ELEMENTAL)
6030 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6031 #endif
6032 #if defined(PETSC_HAVE_SCALAPACK)
6033 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6034 #endif
6035 #if defined(PETSC_HAVE_HYPRE)
6036 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6037 #endif
6038 #if defined(PETSC_HAVE_CUDA)
6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6040 #endif
6041 #if defined(PETSC_HAVE_HIP)
6042 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6043 #endif
6044 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6046 #endif
6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6048 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6049 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6050 
6051 /*
6052     Computes (B'*A')' since computing B*A directly is untenable
6053 
6054                n                       p                          p
6055         [             ]       [             ]         [                 ]
6056       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6057         [             ]       [             ]         [                 ]
6058 
6059 */
6060 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6061 {
6062   Mat At, Bt, Ct;
6063 
6064   PetscFunctionBegin;
6065   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6066   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6067   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6068   PetscCall(MatDestroy(&At));
6069   PetscCall(MatDestroy(&Bt));
6070   PetscCall(MatTransposeSetPrecursor(Ct, C));
6071   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6072   PetscCall(MatDestroy(&Ct));
6073   PetscFunctionReturn(PETSC_SUCCESS);
6074 }
6075 
6076 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6077 {
6078   PetscBool cisdense;
6079 
6080   PetscFunctionBegin;
6081   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6082   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6083   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6084   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6085   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6086   PetscCall(MatSetUp(C));
6087 
6088   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6089   PetscFunctionReturn(PETSC_SUCCESS);
6090 }
6091 
6092 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6093 {
6094   Mat_Product *product = C->product;
6095   Mat          A = product->A, B = product->B;
6096 
6097   PetscFunctionBegin;
6098   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6099              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6100   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6101   C->ops->productsymbolic = MatProductSymbolic_AB;
6102   PetscFunctionReturn(PETSC_SUCCESS);
6103 }
6104 
6105 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6106 {
6107   Mat_Product *product = C->product;
6108 
6109   PetscFunctionBegin;
6110   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6111   PetscFunctionReturn(PETSC_SUCCESS);
6112 }
6113 
6114 /*
6115    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6116 
6117   Input Parameters:
6118 
6119     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6120     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6121 
6122     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6123 
6124     For Set1, j1[] contains column indices of the nonzeros.
6125     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6126     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6127     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6128 
6129     Similar for Set2.
6130 
6131     This routine merges the two sets of nonzeros row by row and removes repeats.
6132 
6133   Output Parameters: (memory is allocated by the caller)
6134 
6135     i[],j[]: the CSR of the merged matrix, which has m rows.
6136     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6137     imap2[]: similar to imap1[], but for Set2.
6138     Note we order nonzeros row-by-row and from left to right.
6139 */
6140 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6141 {
6142   PetscInt   r, m; /* Row index of mat */
6143   PetscCount t, t1, t2, b1, e1, b2, e2;
6144 
6145   PetscFunctionBegin;
6146   PetscCall(MatGetLocalSize(mat, &m, NULL));
6147   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6148   i[0]        = 0;
6149   for (r = 0; r < m; r++) { /* Do row by row merging */
6150     b1 = rowBegin1[r];
6151     e1 = rowEnd1[r];
6152     b2 = rowBegin2[r];
6153     e2 = rowEnd2[r];
6154     while (b1 < e1 && b2 < e2) {
6155       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6156         j[t]      = j1[b1];
6157         imap1[t1] = t;
6158         imap2[t2] = t;
6159         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6160         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6161         t1++;
6162         t2++;
6163         t++;
6164       } else if (j1[b1] < j2[b2]) {
6165         j[t]      = j1[b1];
6166         imap1[t1] = t;
6167         b1 += jmap1[t1 + 1] - jmap1[t1];
6168         t1++;
6169         t++;
6170       } else {
6171         j[t]      = j2[b2];
6172         imap2[t2] = t;
6173         b2 += jmap2[t2 + 1] - jmap2[t2];
6174         t2++;
6175         t++;
6176       }
6177     }
6178     /* Merge the remaining in either j1[] or j2[] */
6179     while (b1 < e1) {
6180       j[t]      = j1[b1];
6181       imap1[t1] = t;
6182       b1 += jmap1[t1 + 1] - jmap1[t1];
6183       t1++;
6184       t++;
6185     }
6186     while (b2 < e2) {
6187       j[t]      = j2[b2];
6188       imap2[t2] = t;
6189       b2 += jmap2[t2 + 1] - jmap2[t2];
6190       t2++;
6191       t++;
6192     }
6193     PetscCall(PetscIntCast(t, i + r + 1));
6194   }
6195   PetscFunctionReturn(PETSC_SUCCESS);
6196 }
6197 
6198 /*
6199   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6200 
6201   Input Parameters:
6202     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6203     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6204       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6205 
6206       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6207       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6208 
6209   Output Parameters:
6210     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6211     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6212       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6213       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6214 
6215     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6216       Atot: number of entries belonging to the diagonal block.
6217       Annz: number of unique nonzeros belonging to the diagonal block.
6218       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6219         repeats (i.e., same 'i,j' pair).
6220       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6221         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6222 
6223       Atot: number of entries belonging to the diagonal block
6224       Annz: number of unique nonzeros belonging to the diagonal block.
6225 
6226     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6227 
6228     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6229 */
6230 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6231 {
6232   PetscInt    cstart, cend, rstart, rend, row, col;
6233   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6234   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6235   PetscCount  k, m, p, q, r, s, mid;
6236   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6237 
6238   PetscFunctionBegin;
6239   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6240   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6241   m = rend - rstart;
6242 
6243   /* Skip negative rows */
6244   for (k = 0; k < n; k++)
6245     if (i[k] >= 0) break;
6246 
6247   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6248      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6249   */
6250   while (k < n) {
6251     row = i[k];
6252     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6253     for (s = k; s < n; s++)
6254       if (i[s] != row) break;
6255 
6256     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6257     for (p = k; p < s; p++) {
6258       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6259       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6260     }
6261     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6262     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6263     rowBegin[row - rstart] = k;
6264     rowMid[row - rstart]   = mid;
6265     rowEnd[row - rstart]   = s;
6266 
6267     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6268     Atot += mid - k;
6269     Btot += s - mid;
6270 
6271     /* Count unique nonzeros of this diag row */
6272     for (p = k; p < mid;) {
6273       col = j[p];
6274       do {
6275         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6276         p++;
6277       } while (p < mid && j[p] == col);
6278       Annz++;
6279     }
6280 
6281     /* Count unique nonzeros of this offdiag row */
6282     for (p = mid; p < s;) {
6283       col = j[p];
6284       do {
6285         p++;
6286       } while (p < s && j[p] == col);
6287       Bnnz++;
6288     }
6289     k = s;
6290   }
6291 
6292   /* Allocation according to Atot, Btot, Annz, Bnnz */
6293   PetscCall(PetscMalloc1(Atot, &Aperm));
6294   PetscCall(PetscMalloc1(Btot, &Bperm));
6295   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6296   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6297 
6298   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6299   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6300   for (r = 0; r < m; r++) {
6301     k   = rowBegin[r];
6302     mid = rowMid[r];
6303     s   = rowEnd[r];
6304     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6305     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6306     Atot += mid - k;
6307     Btot += s - mid;
6308 
6309     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6310     for (p = k; p < mid;) {
6311       col = j[p];
6312       q   = p;
6313       do {
6314         p++;
6315       } while (p < mid && j[p] == col);
6316       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6317       Annz++;
6318     }
6319 
6320     for (p = mid; p < s;) {
6321       col = j[p];
6322       q   = p;
6323       do {
6324         p++;
6325       } while (p < s && j[p] == col);
6326       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6327       Bnnz++;
6328     }
6329   }
6330   /* Output */
6331   *Aperm_ = Aperm;
6332   *Annz_  = Annz;
6333   *Atot_  = Atot;
6334   *Ajmap_ = Ajmap;
6335   *Bperm_ = Bperm;
6336   *Bnnz_  = Bnnz;
6337   *Btot_  = Btot;
6338   *Bjmap_ = Bjmap;
6339   PetscFunctionReturn(PETSC_SUCCESS);
6340 }
6341 
6342 /*
6343   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6344 
6345   Input Parameters:
6346     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6347     nnz:  number of unique nonzeros in the merged matrix
6348     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6349     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6350 
6351   Output Parameter: (memory is allocated by the caller)
6352     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6353 
6354   Example:
6355     nnz1 = 4
6356     nnz  = 6
6357     imap = [1,3,4,5]
6358     jmap = [0,3,5,6,7]
6359    then,
6360     jmap_new = [0,0,3,3,5,6,7]
6361 */
6362 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6363 {
6364   PetscCount k, p;
6365 
6366   PetscFunctionBegin;
6367   jmap_new[0] = 0;
6368   p           = nnz;                /* p loops over jmap_new[] backwards */
6369   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6370     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6371   }
6372   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6373   PetscFunctionReturn(PETSC_SUCCESS);
6374 }
6375 
6376 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data)
6377 {
6378   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data;
6379 
6380   PetscFunctionBegin;
6381   PetscCall(PetscSFDestroy(&coo->sf));
6382   PetscCall(PetscFree(coo->Aperm1));
6383   PetscCall(PetscFree(coo->Bperm1));
6384   PetscCall(PetscFree(coo->Ajmap1));
6385   PetscCall(PetscFree(coo->Bjmap1));
6386   PetscCall(PetscFree(coo->Aimap2));
6387   PetscCall(PetscFree(coo->Bimap2));
6388   PetscCall(PetscFree(coo->Aperm2));
6389   PetscCall(PetscFree(coo->Bperm2));
6390   PetscCall(PetscFree(coo->Ajmap2));
6391   PetscCall(PetscFree(coo->Bjmap2));
6392   PetscCall(PetscFree(coo->Cperm1));
6393   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6394   PetscCall(PetscFree(coo));
6395   PetscFunctionReturn(PETSC_SUCCESS);
6396 }
6397 
6398 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6399 {
6400   MPI_Comm             comm;
6401   PetscMPIInt          rank, size;
6402   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6403   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6404   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6405   PetscContainer       container;
6406   MatCOOStruct_MPIAIJ *coo;
6407 
6408   PetscFunctionBegin;
6409   PetscCall(PetscFree(mpiaij->garray));
6410   PetscCall(VecDestroy(&mpiaij->lvec));
6411 #if defined(PETSC_USE_CTABLE)
6412   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6413 #else
6414   PetscCall(PetscFree(mpiaij->colmap));
6415 #endif
6416   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6417   mat->assembled     = PETSC_FALSE;
6418   mat->was_assembled = PETSC_FALSE;
6419 
6420   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6421   PetscCallMPI(MPI_Comm_size(comm, &size));
6422   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6423   PetscCall(PetscLayoutSetUp(mat->rmap));
6424   PetscCall(PetscLayoutSetUp(mat->cmap));
6425   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6426   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6427   PetscCall(MatGetLocalSize(mat, &m, &n));
6428   PetscCall(MatGetSize(mat, &M, &N));
6429 
6430   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6431   /* entries come first, then local rows, then remote rows.                     */
6432   PetscCount n1 = coo_n, *perm1;
6433   PetscInt  *i1 = coo_i, *j1 = coo_j;
6434 
6435   PetscCall(PetscMalloc1(n1, &perm1));
6436   for (k = 0; k < n1; k++) perm1[k] = k;
6437 
6438   /* Manipulate indices so that entries with negative row or col indices will have smallest
6439      row indices, local entries will have greater but negative row indices, and remote entries
6440      will have positive row indices.
6441   */
6442   for (k = 0; k < n1; k++) {
6443     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6444     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6445     else {
6446       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6447       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6448     }
6449   }
6450 
6451   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6452   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6453 
6454   /* Advance k to the first entry we need to take care of */
6455   for (k = 0; k < n1; k++)
6456     if (i1[k] > PETSC_INT_MIN) break;
6457   PetscCount i1start = k;
6458 
6459   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6460   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6461 
6462   /*           Send remote rows to their owner                                  */
6463   /* Find which rows should be sent to which remote ranks*/
6464   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6465   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6466   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6467   const PetscInt *ranges;
6468   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6469 
6470   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6471   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6472   for (k = rem; k < n1;) {
6473     PetscMPIInt owner;
6474     PetscInt    firstRow, lastRow;
6475 
6476     /* Locate a row range */
6477     firstRow = i1[k]; /* first row of this owner */
6478     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6479     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6480 
6481     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6482     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6483 
6484     /* All entries in [k,p) belong to this remote owner */
6485     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6486       PetscMPIInt *sendto2;
6487       PetscInt    *nentries2;
6488       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6489 
6490       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6491       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6492       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6493       PetscCall(PetscFree2(sendto, nentries2));
6494       sendto   = sendto2;
6495       nentries = nentries2;
6496       maxNsend = maxNsend2;
6497     }
6498     sendto[nsend] = owner;
6499     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6500     nsend++;
6501     k = p;
6502   }
6503 
6504   /* Build 1st SF to know offsets on remote to send data */
6505   PetscSF      sf1;
6506   PetscInt     nroots = 1, nroots2 = 0;
6507   PetscInt     nleaves = nsend, nleaves2 = 0;
6508   PetscInt    *offsets;
6509   PetscSFNode *iremote;
6510 
6511   PetscCall(PetscSFCreate(comm, &sf1));
6512   PetscCall(PetscMalloc1(nsend, &iremote));
6513   PetscCall(PetscMalloc1(nsend, &offsets));
6514   for (k = 0; k < nsend; k++) {
6515     iremote[k].rank  = sendto[k];
6516     iremote[k].index = 0;
6517     nleaves2 += nentries[k];
6518     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6519   }
6520   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6521   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6522   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6523   PetscCall(PetscSFDestroy(&sf1));
6524   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6525 
6526   /* Build 2nd SF to send remote COOs to their owner */
6527   PetscSF sf2;
6528   nroots  = nroots2;
6529   nleaves = nleaves2;
6530   PetscCall(PetscSFCreate(comm, &sf2));
6531   PetscCall(PetscSFSetFromOptions(sf2));
6532   PetscCall(PetscMalloc1(nleaves, &iremote));
6533   p = 0;
6534   for (k = 0; k < nsend; k++) {
6535     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6536     for (q = 0; q < nentries[k]; q++, p++) {
6537       iremote[p].rank = sendto[k];
6538       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6539     }
6540   }
6541   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6542 
6543   /* Send the remote COOs to their owner */
6544   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6545   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6546   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6547   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6548   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6549   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6550   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6551   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6552   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6553   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6554   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6555 
6556   PetscCall(PetscFree(offsets));
6557   PetscCall(PetscFree2(sendto, nentries));
6558 
6559   /* Sort received COOs by row along with the permutation array     */
6560   for (k = 0; k < n2; k++) perm2[k] = k;
6561   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6562 
6563   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6564   PetscCount *Cperm1;
6565   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6566   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6567   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6568   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6569 
6570   /* Support for HYPRE matrices, kind of a hack.
6571      Swap min column with diagonal so that diagonal values will go first */
6572   PetscBool hypre;
6573   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6574   if (hypre) {
6575     PetscInt *minj;
6576     PetscBT   hasdiag;
6577 
6578     PetscCall(PetscBTCreate(m, &hasdiag));
6579     PetscCall(PetscMalloc1(m, &minj));
6580     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6581     for (k = i1start; k < rem; k++) {
6582       if (j1[k] < cstart || j1[k] >= cend) continue;
6583       const PetscInt rindex = i1[k] - rstart;
6584       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6585       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6586     }
6587     for (k = 0; k < n2; k++) {
6588       if (j2[k] < cstart || j2[k] >= cend) continue;
6589       const PetscInt rindex = i2[k] - rstart;
6590       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6591       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6592     }
6593     for (k = i1start; k < rem; k++) {
6594       const PetscInt rindex = i1[k] - rstart;
6595       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6596       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6597       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6598     }
6599     for (k = 0; k < n2; k++) {
6600       const PetscInt rindex = i2[k] - rstart;
6601       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6602       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6603       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6604     }
6605     PetscCall(PetscBTDestroy(&hasdiag));
6606     PetscCall(PetscFree(minj));
6607   }
6608 
6609   /* Split local COOs and received COOs into diag/offdiag portions */
6610   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6611   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6612   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6613   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6614   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6615   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6616 
6617   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6618   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6619   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6620   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6621 
6622   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6623   PetscInt *Ai, *Bi;
6624   PetscInt *Aj, *Bj;
6625 
6626   PetscCall(PetscMalloc1(m + 1, &Ai));
6627   PetscCall(PetscMalloc1(m + 1, &Bi));
6628   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6629   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6630 
6631   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6632   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6633   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6634   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6635   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6636 
6637   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6638   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6639 
6640   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6641   /* expect nonzeros in A/B most likely have local contributing entries        */
6642   PetscInt    Annz = Ai[m];
6643   PetscInt    Bnnz = Bi[m];
6644   PetscCount *Ajmap1_new, *Bjmap1_new;
6645 
6646   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6647   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6648 
6649   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6650   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6651 
6652   PetscCall(PetscFree(Aimap1));
6653   PetscCall(PetscFree(Ajmap1));
6654   PetscCall(PetscFree(Bimap1));
6655   PetscCall(PetscFree(Bjmap1));
6656   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6657   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6658   PetscCall(PetscFree(perm1));
6659   PetscCall(PetscFree3(i2, j2, perm2));
6660 
6661   Ajmap1 = Ajmap1_new;
6662   Bjmap1 = Bjmap1_new;
6663 
6664   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6665   if (Annz < Annz1 + Annz2) {
6666     PetscInt *Aj_new;
6667     PetscCall(PetscMalloc1(Annz, &Aj_new));
6668     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6669     PetscCall(PetscFree(Aj));
6670     Aj = Aj_new;
6671   }
6672 
6673   if (Bnnz < Bnnz1 + Bnnz2) {
6674     PetscInt *Bj_new;
6675     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6676     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6677     PetscCall(PetscFree(Bj));
6678     Bj = Bj_new;
6679   }
6680 
6681   /* Create new submatrices for on-process and off-process coupling                  */
6682   PetscScalar     *Aa, *Ba;
6683   MatType          rtype;
6684   Mat_SeqAIJ      *a, *b;
6685   PetscObjectState state;
6686   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6687   PetscCall(PetscCalloc1(Bnnz, &Ba));
6688   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6689   if (cstart) {
6690     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6691   }
6692 
6693   PetscCall(MatGetRootType_Private(mat, &rtype));
6694 
6695   MatSeqXAIJGetOptions_Private(mpiaij->A);
6696   PetscCall(MatDestroy(&mpiaij->A));
6697   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6698   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6699   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6700 
6701   MatSeqXAIJGetOptions_Private(mpiaij->B);
6702   PetscCall(MatDestroy(&mpiaij->B));
6703   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6704   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6705   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6706 
6707   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6708   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6709   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6710   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6711 
6712   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6713   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6714   a->free_a  = PETSC_TRUE;
6715   a->free_ij = PETSC_TRUE;
6716   b->free_a  = PETSC_TRUE;
6717   b->free_ij = PETSC_TRUE;
6718   a->maxnz   = a->nz;
6719   b->maxnz   = b->nz;
6720 
6721   /* conversion must happen AFTER multiply setup */
6722   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6723   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6724   PetscCall(VecDestroy(&mpiaij->lvec));
6725   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6726 
6727   // Put the COO struct in a container and then attach that to the matrix
6728   PetscCall(PetscMalloc1(1, &coo));
6729   coo->n       = coo_n;
6730   coo->sf      = sf2;
6731   coo->sendlen = nleaves;
6732   coo->recvlen = nroots;
6733   coo->Annz    = Annz;
6734   coo->Bnnz    = Bnnz;
6735   coo->Annz2   = Annz2;
6736   coo->Bnnz2   = Bnnz2;
6737   coo->Atot1   = Atot1;
6738   coo->Atot2   = Atot2;
6739   coo->Btot1   = Btot1;
6740   coo->Btot2   = Btot2;
6741   coo->Ajmap1  = Ajmap1;
6742   coo->Aperm1  = Aperm1;
6743   coo->Bjmap1  = Bjmap1;
6744   coo->Bperm1  = Bperm1;
6745   coo->Aimap2  = Aimap2;
6746   coo->Ajmap2  = Ajmap2;
6747   coo->Aperm2  = Aperm2;
6748   coo->Bimap2  = Bimap2;
6749   coo->Bjmap2  = Bjmap2;
6750   coo->Bperm2  = Bperm2;
6751   coo->Cperm1  = Cperm1;
6752   // Allocate in preallocation. If not used, it has zero cost on host
6753   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6754   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6755   PetscCall(PetscContainerSetPointer(container, coo));
6756   PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ));
6757   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6758   PetscCall(PetscContainerDestroy(&container));
6759   PetscFunctionReturn(PETSC_SUCCESS);
6760 }
6761 
6762 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6763 {
6764   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6765   Mat                  A = mpiaij->A, B = mpiaij->B;
6766   PetscScalar         *Aa, *Ba;
6767   PetscScalar         *sendbuf, *recvbuf;
6768   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6769   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6770   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6771   const PetscCount    *Cperm1;
6772   PetscContainer       container;
6773   MatCOOStruct_MPIAIJ *coo;
6774 
6775   PetscFunctionBegin;
6776   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6777   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6778   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6779   sendbuf = coo->sendbuf;
6780   recvbuf = coo->recvbuf;
6781   Ajmap1  = coo->Ajmap1;
6782   Ajmap2  = coo->Ajmap2;
6783   Aimap2  = coo->Aimap2;
6784   Bjmap1  = coo->Bjmap1;
6785   Bjmap2  = coo->Bjmap2;
6786   Bimap2  = coo->Bimap2;
6787   Aperm1  = coo->Aperm1;
6788   Aperm2  = coo->Aperm2;
6789   Bperm1  = coo->Bperm1;
6790   Bperm2  = coo->Bperm2;
6791   Cperm1  = coo->Cperm1;
6792 
6793   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6794   PetscCall(MatSeqAIJGetArray(B, &Ba));
6795 
6796   /* Pack entries to be sent to remote */
6797   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6798 
6799   /* Send remote entries to their owner and overlap the communication with local computation */
6800   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6801   /* Add local entries to A and B */
6802   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6803     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6804     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6805     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6806   }
6807   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6808     PetscScalar sum = 0.0;
6809     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6810     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6811   }
6812   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6813 
6814   /* Add received remote entries to A and B */
6815   for (PetscCount i = 0; i < coo->Annz2; i++) {
6816     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6817   }
6818   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6819     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6820   }
6821   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6822   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6823   PetscFunctionReturn(PETSC_SUCCESS);
6824 }
6825 
6826 /*MC
6827    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6828 
6829    Options Database Keys:
6830 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6831 
6832    Level: beginner
6833 
6834    Notes:
6835    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6836     in this case the values associated with the rows and columns one passes in are set to zero
6837     in the matrix
6838 
6839     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6840     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6841 
6842 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6843 M*/
6844 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6845 {
6846   Mat_MPIAIJ *b;
6847   PetscMPIInt size;
6848 
6849   PetscFunctionBegin;
6850   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6851 
6852   PetscCall(PetscNew(&b));
6853   B->data       = (void *)b;
6854   B->ops[0]     = MatOps_Values;
6855   B->assembled  = PETSC_FALSE;
6856   B->insertmode = NOT_SET_VALUES;
6857   b->size       = size;
6858 
6859   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6860 
6861   /* build cache for off array entries formed */
6862   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6863 
6864   b->donotstash  = PETSC_FALSE;
6865   b->colmap      = NULL;
6866   b->garray      = NULL;
6867   b->roworiented = PETSC_TRUE;
6868 
6869   /* stuff used for matrix vector multiply */
6870   b->lvec  = NULL;
6871   b->Mvctx = NULL;
6872 
6873   /* stuff for MatGetRow() */
6874   b->rowindices   = NULL;
6875   b->rowvalues    = NULL;
6876   b->getrowactive = PETSC_FALSE;
6877 
6878   /* flexible pointer used in CUSPARSE classes */
6879   b->spptr = NULL;
6880 
6881   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6882   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6883   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6884   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6885   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6886   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6887   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ));
6888   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6889   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6890   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6891   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6892 #if defined(PETSC_HAVE_CUDA)
6893   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6894 #endif
6895 #if defined(PETSC_HAVE_HIP)
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6897 #endif
6898 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6900 #endif
6901 #if defined(PETSC_HAVE_MKL_SPARSE)
6902   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6903 #endif
6904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6908 #if defined(PETSC_HAVE_ELEMENTAL)
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6910 #endif
6911 #if defined(PETSC_HAVE_SCALAPACK)
6912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6913 #endif
6914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6916 #if defined(PETSC_HAVE_HYPRE)
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6919 #endif
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6924   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6925   PetscFunctionReturn(PETSC_SUCCESS);
6926 }
6927 
6928 /*@
6929   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6930   and "off-diagonal" part of the matrix in CSR format.
6931 
6932   Collective
6933 
6934   Input Parameters:
6935 + comm - MPI communicator
6936 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6937 . n    - This value should be the same as the local size used in creating the
6938          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6939          calculated if `N` is given) For square matrices `n` is almost always `m`.
6940 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6941 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6942 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6943 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6944 . a    - matrix values
6945 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6946 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6947 - oa   - matrix values
6948 
6949   Output Parameter:
6950 . mat - the matrix
6951 
6952   Level: advanced
6953 
6954   Notes:
6955   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6956   must free the arrays once the matrix has been destroyed and not before.
6957 
6958   The `i` and `j` indices are 0 based
6959 
6960   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6961 
6962   This sets local rows and cannot be used to set off-processor values.
6963 
6964   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6965   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6966   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6967   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6968   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6969   communication if it is known that only local entries will be set.
6970 
6971 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6972           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6973 @*/
6974 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6975 {
6976   Mat_MPIAIJ *maij;
6977 
6978   PetscFunctionBegin;
6979   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6980   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6981   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6982   PetscCall(MatCreate(comm, mat));
6983   PetscCall(MatSetSizes(*mat, m, n, M, N));
6984   PetscCall(MatSetType(*mat, MATMPIAIJ));
6985   maij = (Mat_MPIAIJ *)(*mat)->data;
6986 
6987   (*mat)->preallocated = PETSC_TRUE;
6988 
6989   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6990   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6991 
6992   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6993   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6994 
6995   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6996   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6997   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6998   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6999   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7000   PetscFunctionReturn(PETSC_SUCCESS);
7001 }
7002 
7003 typedef struct {
7004   Mat       *mp;    /* intermediate products */
7005   PetscBool *mptmp; /* is the intermediate product temporary ? */
7006   PetscInt   cp;    /* number of intermediate products */
7007 
7008   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7009   PetscInt    *startsj_s, *startsj_r;
7010   PetscScalar *bufa;
7011   Mat          P_oth;
7012 
7013   /* may take advantage of merging product->B */
7014   Mat Bloc; /* B-local by merging diag and off-diag */
7015 
7016   /* cusparse does not have support to split between symbolic and numeric phases.
7017      When api_user is true, we don't need to update the numerical values
7018      of the temporary storage */
7019   PetscBool reusesym;
7020 
7021   /* support for COO values insertion */
7022   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7023   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7024   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7025   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7026   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7027   PetscMemType mtype;
7028 
7029   /* customization */
7030   PetscBool abmerge;
7031   PetscBool P_oth_bind;
7032 } MatMatMPIAIJBACKEND;
7033 
7034 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7035 {
7036   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7037   PetscInt             i;
7038 
7039   PetscFunctionBegin;
7040   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7041   PetscCall(PetscFree(mmdata->bufa));
7042   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7043   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7044   PetscCall(MatDestroy(&mmdata->P_oth));
7045   PetscCall(MatDestroy(&mmdata->Bloc));
7046   PetscCall(PetscSFDestroy(&mmdata->sf));
7047   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7048   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7049   PetscCall(PetscFree(mmdata->own[0]));
7050   PetscCall(PetscFree(mmdata->own));
7051   PetscCall(PetscFree(mmdata->off[0]));
7052   PetscCall(PetscFree(mmdata->off));
7053   PetscCall(PetscFree(mmdata));
7054   PetscFunctionReturn(PETSC_SUCCESS);
7055 }
7056 
7057 /* Copy selected n entries with indices in idx[] of A to v[].
7058    If idx is NULL, copy the whole data array of A to v[]
7059  */
7060 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7061 {
7062   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7063 
7064   PetscFunctionBegin;
7065   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7066   if (f) {
7067     PetscCall((*f)(A, n, idx, v));
7068   } else {
7069     const PetscScalar *vv;
7070 
7071     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7072     if (n && idx) {
7073       PetscScalar    *w  = v;
7074       const PetscInt *oi = idx;
7075       PetscInt        j;
7076 
7077       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7078     } else {
7079       PetscCall(PetscArraycpy(v, vv, n));
7080     }
7081     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7082   }
7083   PetscFunctionReturn(PETSC_SUCCESS);
7084 }
7085 
7086 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7087 {
7088   MatMatMPIAIJBACKEND *mmdata;
7089   PetscInt             i, n_d, n_o;
7090 
7091   PetscFunctionBegin;
7092   MatCheckProduct(C, 1);
7093   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7094   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7095   if (!mmdata->reusesym) { /* update temporary matrices */
7096     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7097     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7098   }
7099   mmdata->reusesym = PETSC_FALSE;
7100 
7101   for (i = 0; i < mmdata->cp; i++) {
7102     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7103     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7104   }
7105   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7106     PetscInt noff;
7107 
7108     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7109     if (mmdata->mptmp[i]) continue;
7110     if (noff) {
7111       PetscInt nown;
7112 
7113       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7114       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7115       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7116       n_o += noff;
7117       n_d += nown;
7118     } else {
7119       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7120 
7121       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7122       n_d += mm->nz;
7123     }
7124   }
7125   if (mmdata->hasoffproc) { /* offprocess insertion */
7126     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7127     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7128   }
7129   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7130   PetscFunctionReturn(PETSC_SUCCESS);
7131 }
7132 
7133 /* Support for Pt * A, A * P, or Pt * A * P */
7134 #define MAX_NUMBER_INTERMEDIATE 4
7135 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7136 {
7137   Mat_Product           *product = C->product;
7138   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7139   Mat_MPIAIJ            *a, *p;
7140   MatMatMPIAIJBACKEND   *mmdata;
7141   ISLocalToGlobalMapping P_oth_l2g = NULL;
7142   IS                     glob      = NULL;
7143   const char            *prefix;
7144   char                   pprefix[256];
7145   const PetscInt        *globidx, *P_oth_idx;
7146   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7147   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7148   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7149                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7150                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7151   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7152 
7153   MatProductType ptype;
7154   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7155   PetscMPIInt    size;
7156 
7157   PetscFunctionBegin;
7158   MatCheckProduct(C, 1);
7159   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7160   ptype = product->type;
7161   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7162     ptype                                          = MATPRODUCT_AB;
7163     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7164   }
7165   switch (ptype) {
7166   case MATPRODUCT_AB:
7167     A          = product->A;
7168     P          = product->B;
7169     m          = A->rmap->n;
7170     n          = P->cmap->n;
7171     M          = A->rmap->N;
7172     N          = P->cmap->N;
7173     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7174     break;
7175   case MATPRODUCT_AtB:
7176     P          = product->A;
7177     A          = product->B;
7178     m          = P->cmap->n;
7179     n          = A->cmap->n;
7180     M          = P->cmap->N;
7181     N          = A->cmap->N;
7182     hasoffproc = PETSC_TRUE;
7183     break;
7184   case MATPRODUCT_PtAP:
7185     A          = product->A;
7186     P          = product->B;
7187     m          = P->cmap->n;
7188     n          = P->cmap->n;
7189     M          = P->cmap->N;
7190     N          = P->cmap->N;
7191     hasoffproc = PETSC_TRUE;
7192     break;
7193   default:
7194     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7195   }
7196   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7197   if (size == 1) hasoffproc = PETSC_FALSE;
7198 
7199   /* defaults */
7200   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7201     mp[i]    = NULL;
7202     mptmp[i] = PETSC_FALSE;
7203     rmapt[i] = -1;
7204     cmapt[i] = -1;
7205     rmapa[i] = NULL;
7206     cmapa[i] = NULL;
7207   }
7208 
7209   /* customization */
7210   PetscCall(PetscNew(&mmdata));
7211   mmdata->reusesym = product->api_user;
7212   if (ptype == MATPRODUCT_AB) {
7213     if (product->api_user) {
7214       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7215       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7216       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7217       PetscOptionsEnd();
7218     } else {
7219       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7220       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7221       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7222       PetscOptionsEnd();
7223     }
7224   } else if (ptype == MATPRODUCT_PtAP) {
7225     if (product->api_user) {
7226       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7227       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7228       PetscOptionsEnd();
7229     } else {
7230       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7231       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7232       PetscOptionsEnd();
7233     }
7234   }
7235   a = (Mat_MPIAIJ *)A->data;
7236   p = (Mat_MPIAIJ *)P->data;
7237   PetscCall(MatSetSizes(C, m, n, M, N));
7238   PetscCall(PetscLayoutSetUp(C->rmap));
7239   PetscCall(PetscLayoutSetUp(C->cmap));
7240   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7241   PetscCall(MatGetOptionsPrefix(C, &prefix));
7242 
7243   cp = 0;
7244   switch (ptype) {
7245   case MATPRODUCT_AB: /* A * P */
7246     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7247 
7248     /* A_diag * P_local (merged or not) */
7249     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7250       /* P is product->B */
7251       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7252       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7253       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7254       PetscCall(MatProductSetFill(mp[cp], product->fill));
7255       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7256       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7257       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7258       mp[cp]->product->api_user = product->api_user;
7259       PetscCall(MatProductSetFromOptions(mp[cp]));
7260       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7261       PetscCall(ISGetIndices(glob, &globidx));
7262       rmapt[cp] = 1;
7263       cmapt[cp] = 2;
7264       cmapa[cp] = globidx;
7265       mptmp[cp] = PETSC_FALSE;
7266       cp++;
7267     } else { /* A_diag * P_diag and A_diag * P_off */
7268       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7269       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7270       PetscCall(MatProductSetFill(mp[cp], product->fill));
7271       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7272       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7273       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7274       mp[cp]->product->api_user = product->api_user;
7275       PetscCall(MatProductSetFromOptions(mp[cp]));
7276       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7277       rmapt[cp] = 1;
7278       cmapt[cp] = 1;
7279       mptmp[cp] = PETSC_FALSE;
7280       cp++;
7281       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7282       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7283       PetscCall(MatProductSetFill(mp[cp], product->fill));
7284       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7285       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7286       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7287       mp[cp]->product->api_user = product->api_user;
7288       PetscCall(MatProductSetFromOptions(mp[cp]));
7289       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7290       rmapt[cp] = 1;
7291       cmapt[cp] = 2;
7292       cmapa[cp] = p->garray;
7293       mptmp[cp] = PETSC_FALSE;
7294       cp++;
7295     }
7296 
7297     /* A_off * P_other */
7298     if (mmdata->P_oth) {
7299       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7300       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7301       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7302       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7303       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7304       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7305       PetscCall(MatProductSetFill(mp[cp], product->fill));
7306       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7307       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7308       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7309       mp[cp]->product->api_user = product->api_user;
7310       PetscCall(MatProductSetFromOptions(mp[cp]));
7311       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7312       rmapt[cp] = 1;
7313       cmapt[cp] = 2;
7314       cmapa[cp] = P_oth_idx;
7315       mptmp[cp] = PETSC_FALSE;
7316       cp++;
7317     }
7318     break;
7319 
7320   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7321     /* A is product->B */
7322     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7323     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7324       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7325       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7326       PetscCall(MatProductSetFill(mp[cp], product->fill));
7327       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7328       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7329       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7330       mp[cp]->product->api_user = product->api_user;
7331       PetscCall(MatProductSetFromOptions(mp[cp]));
7332       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7333       PetscCall(ISGetIndices(glob, &globidx));
7334       rmapt[cp] = 2;
7335       rmapa[cp] = globidx;
7336       cmapt[cp] = 2;
7337       cmapa[cp] = globidx;
7338       mptmp[cp] = PETSC_FALSE;
7339       cp++;
7340     } else {
7341       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7342       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7343       PetscCall(MatProductSetFill(mp[cp], product->fill));
7344       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7345       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7346       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7347       mp[cp]->product->api_user = product->api_user;
7348       PetscCall(MatProductSetFromOptions(mp[cp]));
7349       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7350       PetscCall(ISGetIndices(glob, &globidx));
7351       rmapt[cp] = 1;
7352       cmapt[cp] = 2;
7353       cmapa[cp] = globidx;
7354       mptmp[cp] = PETSC_FALSE;
7355       cp++;
7356       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7357       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7358       PetscCall(MatProductSetFill(mp[cp], product->fill));
7359       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7360       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7361       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7362       mp[cp]->product->api_user = product->api_user;
7363       PetscCall(MatProductSetFromOptions(mp[cp]));
7364       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7365       rmapt[cp] = 2;
7366       rmapa[cp] = p->garray;
7367       cmapt[cp] = 2;
7368       cmapa[cp] = globidx;
7369       mptmp[cp] = PETSC_FALSE;
7370       cp++;
7371     }
7372     break;
7373   case MATPRODUCT_PtAP:
7374     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7375     /* P is product->B */
7376     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7377     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7378     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7379     PetscCall(MatProductSetFill(mp[cp], product->fill));
7380     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7381     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7382     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7383     mp[cp]->product->api_user = product->api_user;
7384     PetscCall(MatProductSetFromOptions(mp[cp]));
7385     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7386     PetscCall(ISGetIndices(glob, &globidx));
7387     rmapt[cp] = 2;
7388     rmapa[cp] = globidx;
7389     cmapt[cp] = 2;
7390     cmapa[cp] = globidx;
7391     mptmp[cp] = PETSC_FALSE;
7392     cp++;
7393     if (mmdata->P_oth) {
7394       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7395       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7396       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7397       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7398       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7399       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7400       PetscCall(MatProductSetFill(mp[cp], product->fill));
7401       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7402       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7403       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7404       mp[cp]->product->api_user = product->api_user;
7405       PetscCall(MatProductSetFromOptions(mp[cp]));
7406       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7407       mptmp[cp] = PETSC_TRUE;
7408       cp++;
7409       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7410       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7411       PetscCall(MatProductSetFill(mp[cp], product->fill));
7412       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7413       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7414       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7415       mp[cp]->product->api_user = product->api_user;
7416       PetscCall(MatProductSetFromOptions(mp[cp]));
7417       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7418       rmapt[cp] = 2;
7419       rmapa[cp] = globidx;
7420       cmapt[cp] = 2;
7421       cmapa[cp] = P_oth_idx;
7422       mptmp[cp] = PETSC_FALSE;
7423       cp++;
7424     }
7425     break;
7426   default:
7427     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7428   }
7429   /* sanity check */
7430   if (size > 1)
7431     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7432 
7433   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7434   for (i = 0; i < cp; i++) {
7435     mmdata->mp[i]    = mp[i];
7436     mmdata->mptmp[i] = mptmp[i];
7437   }
7438   mmdata->cp             = cp;
7439   C->product->data       = mmdata;
7440   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7441   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7442 
7443   /* memory type */
7444   mmdata->mtype = PETSC_MEMTYPE_HOST;
7445   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7446   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7447   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7448   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7449   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7450   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7451 
7452   /* prepare coo coordinates for values insertion */
7453 
7454   /* count total nonzeros of those intermediate seqaij Mats
7455     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7456     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7457     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7458   */
7459   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7460     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7461     if (mptmp[cp]) continue;
7462     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7463       const PetscInt *rmap = rmapa[cp];
7464       const PetscInt  mr   = mp[cp]->rmap->n;
7465       const PetscInt  rs   = C->rmap->rstart;
7466       const PetscInt  re   = C->rmap->rend;
7467       const PetscInt *ii   = mm->i;
7468       for (i = 0; i < mr; i++) {
7469         const PetscInt gr = rmap[i];
7470         const PetscInt nz = ii[i + 1] - ii[i];
7471         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7472         else ncoo_oown += nz;                  /* this row is local */
7473       }
7474     } else ncoo_d += mm->nz;
7475   }
7476 
7477   /*
7478     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7479 
7480     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7481 
7482     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7483 
7484     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7485     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7486     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7487 
7488     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7489     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7490   */
7491   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7492   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7493 
7494   /* gather (i,j) of nonzeros inserted by remote procs */
7495   if (hasoffproc) {
7496     PetscSF  msf;
7497     PetscInt ncoo2, *coo_i2, *coo_j2;
7498 
7499     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7500     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7501     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7502 
7503     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7504       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7505       PetscInt   *idxoff = mmdata->off[cp];
7506       PetscInt   *idxown = mmdata->own[cp];
7507       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7508         const PetscInt *rmap = rmapa[cp];
7509         const PetscInt *cmap = cmapa[cp];
7510         const PetscInt *ii   = mm->i;
7511         PetscInt       *coi  = coo_i + ncoo_o;
7512         PetscInt       *coj  = coo_j + ncoo_o;
7513         const PetscInt  mr   = mp[cp]->rmap->n;
7514         const PetscInt  rs   = C->rmap->rstart;
7515         const PetscInt  re   = C->rmap->rend;
7516         const PetscInt  cs   = C->cmap->rstart;
7517         for (i = 0; i < mr; i++) {
7518           const PetscInt *jj = mm->j + ii[i];
7519           const PetscInt  gr = rmap[i];
7520           const PetscInt  nz = ii[i + 1] - ii[i];
7521           if (gr < rs || gr >= re) { /* this is an offproc row */
7522             for (j = ii[i]; j < ii[i + 1]; j++) {
7523               *coi++    = gr;
7524               *idxoff++ = j;
7525             }
7526             if (!cmapt[cp]) { /* already global */
7527               for (j = 0; j < nz; j++) *coj++ = jj[j];
7528             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7529               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7530             } else { /* offdiag */
7531               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7532             }
7533             ncoo_o += nz;
7534           } else { /* this is a local row */
7535             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7536           }
7537         }
7538       }
7539       mmdata->off[cp + 1] = idxoff;
7540       mmdata->own[cp + 1] = idxown;
7541     }
7542 
7543     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7544     PetscInt incoo_o;
7545     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7546     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7547     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7548     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7549     ncoo = ncoo_d + ncoo_oown + ncoo2;
7550     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7551     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7552     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7553     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7554     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7555     PetscCall(PetscFree2(coo_i, coo_j));
7556     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7557     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7558     coo_i = coo_i2;
7559     coo_j = coo_j2;
7560   } else { /* no offproc values insertion */
7561     ncoo = ncoo_d;
7562     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7563 
7564     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7565     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7566     PetscCall(PetscSFSetUp(mmdata->sf));
7567   }
7568   mmdata->hasoffproc = hasoffproc;
7569 
7570   /* gather (i,j) of nonzeros inserted locally */
7571   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7572     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7573     PetscInt       *coi  = coo_i + ncoo_d;
7574     PetscInt       *coj  = coo_j + ncoo_d;
7575     const PetscInt *jj   = mm->j;
7576     const PetscInt *ii   = mm->i;
7577     const PetscInt *cmap = cmapa[cp];
7578     const PetscInt *rmap = rmapa[cp];
7579     const PetscInt  mr   = mp[cp]->rmap->n;
7580     const PetscInt  rs   = C->rmap->rstart;
7581     const PetscInt  re   = C->rmap->rend;
7582     const PetscInt  cs   = C->cmap->rstart;
7583 
7584     if (mptmp[cp]) continue;
7585     if (rmapt[cp] == 1) { /* consecutive rows */
7586       /* fill coo_i */
7587       for (i = 0; i < mr; i++) {
7588         const PetscInt gr = i + rs;
7589         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7590       }
7591       /* fill coo_j */
7592       if (!cmapt[cp]) { /* type-0, already global */
7593         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7594       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7595         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7596       } else {                                            /* type-2, local to global for sparse columns */
7597         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7598       }
7599       ncoo_d += mm->nz;
7600     } else if (rmapt[cp] == 2) { /* sparse rows */
7601       for (i = 0; i < mr; i++) {
7602         const PetscInt *jj = mm->j + ii[i];
7603         const PetscInt  gr = rmap[i];
7604         const PetscInt  nz = ii[i + 1] - ii[i];
7605         if (gr >= rs && gr < re) { /* local rows */
7606           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7607           if (!cmapt[cp]) { /* type-0, already global */
7608             for (j = 0; j < nz; j++) *coj++ = jj[j];
7609           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7610             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7611           } else { /* type-2, local to global for sparse columns */
7612             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7613           }
7614           ncoo_d += nz;
7615         }
7616       }
7617     }
7618   }
7619   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7620   PetscCall(ISDestroy(&glob));
7621   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7622   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7623   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7624   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7625 
7626   /* set block sizes */
7627   A = product->A;
7628   P = product->B;
7629   switch (ptype) {
7630   case MATPRODUCT_PtAP:
7631     PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs));
7632     break;
7633   case MATPRODUCT_RARt:
7634     PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs));
7635     break;
7636   case MATPRODUCT_ABC:
7637     PetscCall(MatSetBlockSizesFromMats(C, A, product->C));
7638     break;
7639   case MATPRODUCT_AB:
7640     PetscCall(MatSetBlockSizesFromMats(C, A, P));
7641     break;
7642   case MATPRODUCT_AtB:
7643     PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs));
7644     break;
7645   case MATPRODUCT_ABt:
7646     PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs));
7647     break;
7648   default:
7649     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]);
7650   }
7651 
7652   /* preallocate with COO data */
7653   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7654   PetscCall(PetscFree2(coo_i, coo_j));
7655   PetscFunctionReturn(PETSC_SUCCESS);
7656 }
7657 
7658 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7659 {
7660   Mat_Product *product = mat->product;
7661 #if defined(PETSC_HAVE_DEVICE)
7662   PetscBool match  = PETSC_FALSE;
7663   PetscBool usecpu = PETSC_FALSE;
7664 #else
7665   PetscBool match = PETSC_TRUE;
7666 #endif
7667 
7668   PetscFunctionBegin;
7669   MatCheckProduct(mat, 1);
7670 #if defined(PETSC_HAVE_DEVICE)
7671   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7672   if (match) { /* we can always fallback to the CPU if requested */
7673     switch (product->type) {
7674     case MATPRODUCT_AB:
7675       if (product->api_user) {
7676         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7677         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7678         PetscOptionsEnd();
7679       } else {
7680         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7681         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7682         PetscOptionsEnd();
7683       }
7684       break;
7685     case MATPRODUCT_AtB:
7686       if (product->api_user) {
7687         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7688         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7689         PetscOptionsEnd();
7690       } else {
7691         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7692         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7693         PetscOptionsEnd();
7694       }
7695       break;
7696     case MATPRODUCT_PtAP:
7697       if (product->api_user) {
7698         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7699         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7700         PetscOptionsEnd();
7701       } else {
7702         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7703         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7704         PetscOptionsEnd();
7705       }
7706       break;
7707     default:
7708       break;
7709     }
7710     match = (PetscBool)!usecpu;
7711   }
7712 #endif
7713   if (match) {
7714     switch (product->type) {
7715     case MATPRODUCT_AB:
7716     case MATPRODUCT_AtB:
7717     case MATPRODUCT_PtAP:
7718       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7719       break;
7720     default:
7721       break;
7722     }
7723   }
7724   /* fallback to MPIAIJ ops */
7725   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7726   PetscFunctionReturn(PETSC_SUCCESS);
7727 }
7728 
7729 /*
7730    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7731 
7732    n - the number of block indices in cc[]
7733    cc - the block indices (must be large enough to contain the indices)
7734 */
7735 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7736 {
7737   PetscInt        cnt = -1, nidx, j;
7738   const PetscInt *idx;
7739 
7740   PetscFunctionBegin;
7741   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7742   if (nidx) {
7743     cnt     = 0;
7744     cc[cnt] = idx[0] / bs;
7745     for (j = 1; j < nidx; j++) {
7746       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7747     }
7748   }
7749   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7750   *n = cnt + 1;
7751   PetscFunctionReturn(PETSC_SUCCESS);
7752 }
7753 
7754 /*
7755     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7756 
7757     ncollapsed - the number of block indices
7758     collapsed - the block indices (must be large enough to contain the indices)
7759 */
7760 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7761 {
7762   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7763 
7764   PetscFunctionBegin;
7765   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7766   for (i = start + 1; i < start + bs; i++) {
7767     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7768     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7769     cprevtmp = cprev;
7770     cprev    = merged;
7771     merged   = cprevtmp;
7772   }
7773   *ncollapsed = nprev;
7774   if (collapsed) *collapsed = cprev;
7775   PetscFunctionReturn(PETSC_SUCCESS);
7776 }
7777 
7778 /*
7779  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7780 
7781  Input Parameter:
7782  . Amat - matrix
7783  - symmetrize - make the result symmetric
7784  + scale - scale with diagonal
7785 
7786  Output Parameter:
7787  . a_Gmat - output scalar graph >= 0
7788 
7789 */
7790 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7791 {
7792   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7793   MPI_Comm  comm;
7794   Mat       Gmat;
7795   PetscBool ismpiaij, isseqaij;
7796   Mat       a, b, c;
7797   MatType   jtype;
7798 
7799   PetscFunctionBegin;
7800   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7801   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7802   PetscCall(MatGetSize(Amat, &MM, &NN));
7803   PetscCall(MatGetBlockSize(Amat, &bs));
7804   nloc = (Iend - Istart) / bs;
7805 
7806   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7807   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7808   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7809 
7810   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7811   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7812      implementation */
7813   if (bs > 1) {
7814     PetscCall(MatGetType(Amat, &jtype));
7815     PetscCall(MatCreate(comm, &Gmat));
7816     PetscCall(MatSetType(Gmat, jtype));
7817     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7818     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7819     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7820       PetscInt  *d_nnz, *o_nnz;
7821       MatScalar *aa, val, *AA;
7822       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7823 
7824       if (isseqaij) {
7825         a = Amat;
7826         b = NULL;
7827       } else {
7828         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7829         a             = d->A;
7830         b             = d->B;
7831       }
7832       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7833       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7834       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7835         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7836         const PetscInt *cols1, *cols2;
7837 
7838         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7839           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7840           nnz[brow / bs] = nc2 / bs;
7841           if (nc2 % bs) ok = 0;
7842           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7843           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7844             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7845             if (nc1 != nc2) ok = 0;
7846             else {
7847               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7848                 if (cols1[jj] != cols2[jj]) ok = 0;
7849                 if (cols1[jj] % bs != jj % bs) ok = 0;
7850               }
7851             }
7852             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7853           }
7854           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7855           if (!ok) {
7856             PetscCall(PetscFree2(d_nnz, o_nnz));
7857             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7858             goto old_bs;
7859           }
7860         }
7861       }
7862       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7863       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7864       PetscCall(PetscFree2(d_nnz, o_nnz));
7865       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7866       // diag
7867       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7868         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7869 
7870         ai = aseq->i;
7871         n  = ai[brow + 1] - ai[brow];
7872         aj = aseq->j + ai[brow];
7873         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7874           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7875           val        = 0;
7876           if (index_size == 0) {
7877             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7878               aa = aseq->a + ai[brow + ii] + k;
7879               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7880                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7881               }
7882             }
7883           } else {                                            // use (index,index) value if provided
7884             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7885               PetscInt ii = index[iii];
7886               aa          = aseq->a + ai[brow + ii] + k;
7887               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7888                 PetscInt jj = index[jjj];
7889                 val += PetscAbs(PetscRealPart(aa[jj]));
7890               }
7891             }
7892           }
7893           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7894           AA[k / bs] = val;
7895         }
7896         grow = Istart / bs + brow / bs;
7897         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7898       }
7899       // off-diag
7900       if (ismpiaij) {
7901         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7902         const PetscScalar *vals;
7903         const PetscInt    *cols, *garray = aij->garray;
7904 
7905         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7906         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7907           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7908           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7909             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7910             AA[k / bs] = 0;
7911             AJ[cidx]   = garray[cols[k]] / bs;
7912           }
7913           nc = ncols / bs;
7914           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7915           if (index_size == 0) {
7916             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7917               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7918               for (PetscInt k = 0; k < ncols; k += bs) {
7919                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7920                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7921                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7922                 }
7923               }
7924               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7925             }
7926           } else {                                            // use (index,index) value if provided
7927             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7928               PetscInt ii = index[iii];
7929               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7930               for (PetscInt k = 0; k < ncols; k += bs) {
7931                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7932                   PetscInt jj = index[jjj];
7933                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7934                 }
7935               }
7936               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7937             }
7938           }
7939           grow = Istart / bs + brow / bs;
7940           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7941         }
7942       }
7943       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7944       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7945       PetscCall(PetscFree2(AA, AJ));
7946     } else {
7947       const PetscScalar *vals;
7948       const PetscInt    *idx;
7949       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7950     old_bs:
7951       /*
7952        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7953        */
7954       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7955       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7956       if (isseqaij) {
7957         PetscInt max_d_nnz;
7958 
7959         /*
7960          Determine exact preallocation count for (sequential) scalar matrix
7961          */
7962         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7963         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7964         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7965         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7966         PetscCall(PetscFree3(w0, w1, w2));
7967       } else if (ismpiaij) {
7968         Mat             Daij, Oaij;
7969         const PetscInt *garray;
7970         PetscInt        max_d_nnz;
7971 
7972         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7973         /*
7974          Determine exact preallocation count for diagonal block portion of scalar matrix
7975          */
7976         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7977         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7978         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7979         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7980         PetscCall(PetscFree3(w0, w1, w2));
7981         /*
7982          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7983          */
7984         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7985           o_nnz[jj] = 0;
7986           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7987             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7988             o_nnz[jj] += ncols;
7989             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7990           }
7991           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7992         }
7993       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7994       /* get scalar copy (norms) of matrix */
7995       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7996       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7997       PetscCall(PetscFree2(d_nnz, o_nnz));
7998       for (Ii = Istart; Ii < Iend; Ii++) {
7999         PetscInt dest_row = Ii / bs;
8000 
8001         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
8002         for (jj = 0; jj < ncols; jj++) {
8003           PetscInt    dest_col = idx[jj] / bs;
8004           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
8005 
8006           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
8007         }
8008         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
8009       }
8010       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
8011       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
8012     }
8013   } else {
8014     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8015     else {
8016       Gmat = Amat;
8017       PetscCall(PetscObjectReference((PetscObject)Gmat));
8018     }
8019     if (isseqaij) {
8020       a = Gmat;
8021       b = NULL;
8022     } else {
8023       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8024       a             = d->A;
8025       b             = d->B;
8026     }
8027     if (filter >= 0 || scale) {
8028       /* take absolute value of each entry */
8029       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8030         MatInfo      info;
8031         PetscScalar *avals;
8032 
8033         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8034         PetscCall(MatSeqAIJGetArray(c, &avals));
8035         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8036         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8037       }
8038     }
8039   }
8040   if (symmetrize) {
8041     PetscBool isset, issym;
8042 
8043     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8044     if (!isset || !issym) {
8045       Mat matTrans;
8046 
8047       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8048       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8049       PetscCall(MatDestroy(&matTrans));
8050     }
8051     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8052   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8053   if (scale) {
8054     /* scale c for all diagonal values = 1 or -1 */
8055     Vec diag;
8056 
8057     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8058     PetscCall(MatGetDiagonal(Gmat, diag));
8059     PetscCall(VecReciprocal(diag));
8060     PetscCall(VecSqrtAbs(diag));
8061     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8062     PetscCall(VecDestroy(&diag));
8063   }
8064   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8065   if (filter >= 0) {
8066     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8067     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8068   }
8069   *a_Gmat = Gmat;
8070   PetscFunctionReturn(PETSC_SUCCESS);
8071 }
8072 
8073 /*
8074     Special version for direct calls from Fortran
8075 */
8076 
8077 /* Change these macros so can be used in void function */
8078 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8079 #undef PetscCall
8080 #define PetscCall(...) \
8081   do { \
8082     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8083     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8084       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8085       return; \
8086     } \
8087   } while (0)
8088 
8089 #undef SETERRQ
8090 #define SETERRQ(comm, ierr, ...) \
8091   do { \
8092     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8093     return; \
8094   } while (0)
8095 
8096 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8097   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8098 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8099   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8100 #else
8101 #endif
8102 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8103 {
8104   Mat         mat = *mmat;
8105   PetscInt    m = *mm, n = *mn;
8106   InsertMode  addv = *maddv;
8107   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8108   PetscScalar value;
8109 
8110   MatCheckPreallocated(mat, 1);
8111   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8112   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8113   {
8114     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8115     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8116     PetscBool roworiented = aij->roworiented;
8117 
8118     /* Some Variables required in the macro */
8119     Mat         A     = aij->A;
8120     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8121     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8122     MatScalar  *aa;
8123     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8124     Mat         B                 = aij->B;
8125     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8126     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8127     MatScalar  *ba;
8128     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8129      * cannot use "#if defined" inside a macro. */
8130     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8131 
8132     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8133     PetscInt   nonew = a->nonew;
8134     MatScalar *ap1, *ap2;
8135 
8136     PetscFunctionBegin;
8137     PetscCall(MatSeqAIJGetArray(A, &aa));
8138     PetscCall(MatSeqAIJGetArray(B, &ba));
8139     for (i = 0; i < m; i++) {
8140       if (im[i] < 0) continue;
8141       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8142       if (im[i] >= rstart && im[i] < rend) {
8143         row      = im[i] - rstart;
8144         lastcol1 = -1;
8145         rp1      = aj + ai[row];
8146         ap1      = aa + ai[row];
8147         rmax1    = aimax[row];
8148         nrow1    = ailen[row];
8149         low1     = 0;
8150         high1    = nrow1;
8151         lastcol2 = -1;
8152         rp2      = bj + bi[row];
8153         ap2      = ba + bi[row];
8154         rmax2    = bimax[row];
8155         nrow2    = bilen[row];
8156         low2     = 0;
8157         high2    = nrow2;
8158 
8159         for (j = 0; j < n; j++) {
8160           if (roworiented) value = v[i * n + j];
8161           else value = v[i + j * m];
8162           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8163           if (in[j] >= cstart && in[j] < cend) {
8164             col = in[j] - cstart;
8165             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8166           } else if (in[j] < 0) continue;
8167           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8168             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8169           } else {
8170             if (mat->was_assembled) {
8171               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8172 #if defined(PETSC_USE_CTABLE)
8173               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8174               col--;
8175 #else
8176               col = aij->colmap[in[j]] - 1;
8177 #endif
8178               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8179                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8180                 col = in[j];
8181                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8182                 B        = aij->B;
8183                 b        = (Mat_SeqAIJ *)B->data;
8184                 bimax    = b->imax;
8185                 bi       = b->i;
8186                 bilen    = b->ilen;
8187                 bj       = b->j;
8188                 rp2      = bj + bi[row];
8189                 ap2      = ba + bi[row];
8190                 rmax2    = bimax[row];
8191                 nrow2    = bilen[row];
8192                 low2     = 0;
8193                 high2    = nrow2;
8194                 bm       = aij->B->rmap->n;
8195                 ba       = b->a;
8196                 inserted = PETSC_FALSE;
8197               }
8198             } else col = in[j];
8199             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8200           }
8201         }
8202       } else if (!aij->donotstash) {
8203         if (roworiented) {
8204           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8205         } else {
8206           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8207         }
8208       }
8209     }
8210     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8211     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8212   }
8213   PetscFunctionReturnVoid();
8214 }
8215 
8216 /* Undefining these here since they were redefined from their original definition above! No
8217  * other PETSc functions should be defined past this point, as it is impossible to recover the
8218  * original definitions */
8219 #undef PetscCall
8220 #undef SETERRQ
8221