xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ac84dfd5778759083efa0c46d3820bac8a11500e)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */
10 #define TYPE AIJ
11 #define TYPE_AIJ
12 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
13 #undef TYPE
14 #undef TYPE_AIJ
15 
16 static PetscErrorCode MatReset_MPIAIJ(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
19 
20   PetscFunctionBegin;
21   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
22   PetscCall(MatStashDestroy_Private(&mat->stash));
23   PetscCall(VecDestroy(&aij->diag));
24   PetscCall(MatDestroy(&aij->A));
25   PetscCall(MatDestroy(&aij->B));
26 #if defined(PETSC_USE_CTABLE)
27   PetscCall(PetscHMapIDestroy(&aij->colmap));
28 #else
29   PetscCall(PetscFree(aij->colmap));
30 #endif
31   PetscCall(PetscFree(aij->garray));
32   PetscCall(VecDestroy(&aij->lvec));
33   PetscCall(VecScatterDestroy(&aij->Mvctx));
34   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
35   PetscCall(PetscFree(aij->ld));
36   PetscFunctionReturn(PETSC_SUCCESS);
37 }
38 
39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
42   /* Save the nonzero states of the component matrices because those are what are used to determine
43     the nonzero state of mat */
44   PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate;
45 
46   PetscFunctionBegin;
47   PetscCall(MatReset_MPIAIJ(mat));
48   PetscCall(MatSetUp_MPI_Hash(mat));
49   aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate;
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
54 {
55   PetscFunctionBegin;
56   PetscCall(MatReset_MPIAIJ(mat));
57 
58   PetscCall(PetscFree(mat->data));
59 
60   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
61   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
62 
63   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
74 #if defined(PETSC_HAVE_CUDA)
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
76 #endif
77 #if defined(PETSC_HAVE_HIP)
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
79 #endif
80 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
82 #endif
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
84 #if defined(PETSC_HAVE_ELEMENTAL)
85   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
86 #endif
87 #if defined(PETSC_HAVE_SCALAPACK)
88   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
89 #endif
90 #if defined(PETSC_HAVE_HYPRE)
91   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
92   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
93 #endif
94   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
95   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
96   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
97   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
98   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
99   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
100 #if defined(PETSC_HAVE_MKL_SPARSE)
101   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
102 #endif
103   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
104   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
105   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
106   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
107   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
112 {
113   Mat B;
114 
115   PetscFunctionBegin;
116   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
117   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
118   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
119   PetscCall(MatDestroy(&B));
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
124 {
125   Mat B;
126 
127   PetscFunctionBegin;
128   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
129   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
130   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*MC
135    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
136 
137    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
138    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
139   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
140   for communicators controlling multiple processes.  It is recommended that you call both of
141   the above preallocation routines for simplicity.
142 
143    Options Database Key:
144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
145 
146   Developer Note:
147   Level: beginner
148 
149     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
150    enough exist.
151 
152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
153 M*/
154 
155 /*MC
156    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
157 
158    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
159    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
160    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
161   for communicators controlling multiple processes.  It is recommended that you call both of
162   the above preallocation routines for simplicity.
163 
164    Options Database Key:
165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
166 
167   Level: beginner
168 
169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
170 M*/
171 
172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
173 {
174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
175 
176   PetscFunctionBegin;
177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
178   A->boundtocpu = flg;
179 #endif
180   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
181   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
182 
183   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
184    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
185    * to differ from the parent matrix. */
186   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
187   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
192 {
193   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
194 
195   PetscFunctionBegin;
196   if (mat->A) {
197     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
198     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
199   }
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
204 {
205   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
206   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
207   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
208   const PetscInt  *ia, *ib;
209   const MatScalar *aa, *bb, *aav, *bav;
210   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
211   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
212 
213   PetscFunctionBegin;
214   *keptrows = NULL;
215 
216   ia = a->i;
217   ib = b->i;
218   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
219   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
220   for (i = 0; i < m; i++) {
221     na = ia[i + 1] - ia[i];
222     nb = ib[i + 1] - ib[i];
223     if (!na && !nb) {
224       cnt++;
225       goto ok1;
226     }
227     aa = aav + ia[i];
228     for (j = 0; j < na; j++) {
229       if (aa[j] != 0.0) goto ok1;
230     }
231     bb = PetscSafePointerPlusOffset(bav, ib[i]);
232     for (j = 0; j < nb; j++) {
233       if (bb[j] != 0.0) goto ok1;
234     }
235     cnt++;
236   ok1:;
237   }
238   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
239   if (!n0rows) {
240     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
241     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
242     PetscFunctionReturn(PETSC_SUCCESS);
243   }
244   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
245   cnt = 0;
246   for (i = 0; i < m; i++) {
247     na = ia[i + 1] - ia[i];
248     nb = ib[i + 1] - ib[i];
249     if (!na && !nb) continue;
250     aa = aav + ia[i];
251     for (j = 0; j < na; j++) {
252       if (aa[j] != 0.0) {
253         rows[cnt++] = rstart + i;
254         goto ok2;
255       }
256     }
257     bb = PetscSafePointerPlusOffset(bav, ib[i]);
258     for (j = 0; j < nb; j++) {
259       if (bb[j] != 0.0) {
260         rows[cnt++] = rstart + i;
261         goto ok2;
262       }
263     }
264   ok2:;
265   }
266   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
267   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
268   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
275   PetscBool   cong;
276 
277   PetscFunctionBegin;
278   PetscCall(MatHasCongruentLayouts(Y, &cong));
279   if (Y->assembled && cong) {
280     PetscCall(MatDiagonalSet(aij->A, D, is));
281   } else {
282     PetscCall(MatDiagonalSet_Default(Y, D, is));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
288 {
289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
290   PetscInt    i, rstart, nrows, *rows;
291 
292   PetscFunctionBegin;
293   *zrows = NULL;
294   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
295   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
296   for (i = 0; i < nrows; i++) rows[i] += rstart;
297   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
302 {
303   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
304   PetscInt           i, m, n, *garray = aij->garray;
305   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
306   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
307   PetscReal         *work;
308   const PetscScalar *dummy;
309 
310   PetscFunctionBegin;
311   PetscCall(MatGetSize(A, &m, &n));
312   PetscCall(PetscCalloc1(n, &work));
313   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
314   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
315   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
316   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
317   if (type == NORM_2) {
318     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
319     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
320   } else if (type == NORM_1) {
321     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
322     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
323   } else if (type == NORM_INFINITY) {
324     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
325     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
326   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
327     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
328     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
329   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
330     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
331     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
332   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
333   if (type == NORM_INFINITY) {
334     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
335   } else {
336     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
337   }
338   PetscCall(PetscFree(work));
339   if (type == NORM_2) {
340     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
341   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
342     for (i = 0; i < n; i++) reductions[i] /= m;
343   }
344   PetscFunctionReturn(PETSC_SUCCESS);
345 }
346 
347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
348 {
349   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
350   IS              sis, gis;
351   const PetscInt *isis, *igis;
352   PetscInt        n, *iis, nsis, ngis, rstart, i;
353 
354   PetscFunctionBegin;
355   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
356   PetscCall(MatFindNonzeroRows(a->B, &gis));
357   PetscCall(ISGetSize(gis, &ngis));
358   PetscCall(ISGetSize(sis, &nsis));
359   PetscCall(ISGetIndices(sis, &isis));
360   PetscCall(ISGetIndices(gis, &igis));
361 
362   PetscCall(PetscMalloc1(ngis + nsis, &iis));
363   PetscCall(PetscArraycpy(iis, igis, ngis));
364   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
365   n = ngis + nsis;
366   PetscCall(PetscSortRemoveDupsInt(&n, iis));
367   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
368   for (i = 0; i < n; i++) iis[i] += rstart;
369   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
370 
371   PetscCall(ISRestoreIndices(sis, &isis));
372   PetscCall(ISRestoreIndices(gis, &igis));
373   PetscCall(ISDestroy(&sis));
374   PetscCall(ISDestroy(&gis));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 /*
379   Local utility routine that creates a mapping from the global column
380 number to the local number in the off-diagonal part of the local
381 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
382 a slightly higher hash table cost; without it it is not scalable (each processor
383 has an order N integer array but is fast to access.
384 */
385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
386 {
387   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
388   PetscInt    n   = aij->B->cmap->n, i;
389 
390   PetscFunctionBegin;
391   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
392 #if defined(PETSC_USE_CTABLE)
393   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
394   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
395 #else
396   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
397   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
398 #endif
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
403   do { \
404     if (col <= lastcol1) low1 = 0; \
405     else high1 = nrow1; \
406     lastcol1 = col; \
407     while (high1 - low1 > 5) { \
408       t = (low1 + high1) / 2; \
409       if (rp1[t] > col) high1 = t; \
410       else low1 = t; \
411     } \
412     for (_i = low1; _i < high1; _i++) { \
413       if (rp1[_i] > col) break; \
414       if (rp1[_i] == col) { \
415         if (addv == ADD_VALUES) { \
416           ap1[_i] += value; \
417           /* Not sure LogFlops will slow dow the code or not */ \
418           (void)PetscLogFlops(1.0); \
419         } else ap1[_i] = value; \
420         goto a_noinsert; \
421       } \
422     } \
423     if (value == 0.0 && ignorezeroentries && row != col) { \
424       low1  = 0; \
425       high1 = nrow1; \
426       goto a_noinsert; \
427     } \
428     if (nonew == 1) { \
429       low1  = 0; \
430       high1 = nrow1; \
431       goto a_noinsert; \
432     } \
433     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
434     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
435     N = nrow1++ - 1; \
436     a->nz++; \
437     high1++; \
438     /* shift up all the later entries in this row */ \
439     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
440     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
441     rp1[_i] = col; \
442     ap1[_i] = value; \
443   a_noinsert:; \
444     ailen[row] = nrow1; \
445   } while (0)
446 
447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
448   do { \
449     if (col <= lastcol2) low2 = 0; \
450     else high2 = nrow2; \
451     lastcol2 = col; \
452     while (high2 - low2 > 5) { \
453       t = (low2 + high2) / 2; \
454       if (rp2[t] > col) high2 = t; \
455       else low2 = t; \
456     } \
457     for (_i = low2; _i < high2; _i++) { \
458       if (rp2[_i] > col) break; \
459       if (rp2[_i] == col) { \
460         if (addv == ADD_VALUES) { \
461           ap2[_i] += value; \
462           (void)PetscLogFlops(1.0); \
463         } else ap2[_i] = value; \
464         goto b_noinsert; \
465       } \
466     } \
467     if (value == 0.0 && ignorezeroentries) { \
468       low2  = 0; \
469       high2 = nrow2; \
470       goto b_noinsert; \
471     } \
472     if (nonew == 1) { \
473       low2  = 0; \
474       high2 = nrow2; \
475       goto b_noinsert; \
476     } \
477     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
478     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
479     N = nrow2++ - 1; \
480     b->nz++; \
481     high2++; \
482     /* shift up all the later entries in this row */ \
483     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
484     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
485     rp2[_i] = col; \
486     ap2[_i] = value; \
487   b_noinsert:; \
488     bilen[row] = nrow2; \
489   } while (0)
490 
491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
492 {
493   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
494   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
495   PetscInt     l, *garray                         = mat->garray, diag;
496   PetscScalar *aa, *ba;
497 
498   PetscFunctionBegin;
499   /* code only works for square matrices A */
500 
501   /* find size of row to the left of the diagonal part */
502   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
503   row = row - diag;
504   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
505     if (garray[b->j[b->i[row] + l]] > diag) break;
506   }
507   if (l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512 
513   /* diagonal part */
514   if (a->i[row + 1] - a->i[row]) {
515     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
516     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
517     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
518   }
519 
520   /* right of diagonal part */
521   if (b->i[row + 1] - b->i[row] - l) {
522     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
523     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
524     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
525   }
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
530 {
531   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
532   PetscScalar value = 0.0;
533   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
534   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
535   PetscBool   roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat         A     = aij->A;
539   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
540   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
541   PetscBool   ignorezeroentries = a->ignorezeroentries;
542   Mat         B                 = aij->B;
543   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
544   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
545   MatScalar  *aa, *ba;
546   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
547   PetscInt    nonew;
548   MatScalar  *ap1, *ap2;
549 
550   PetscFunctionBegin;
551   PetscCall(MatSeqAIJGetArray(A, &aa));
552   PetscCall(MatSeqAIJGetArray(B, &ba));
553   for (i = 0; i < m; i++) {
554     if (im[i] < 0) continue;
555     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
560       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
567       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j = 0; j < n; j++) {
574         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
575         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
580         } else if (in[j] < 0) {
581           continue;
582         } else {
583           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
584           if (mat->was_assembled) {
585             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
586 #if defined(PETSC_USE_CTABLE)
587             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
593               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
594               col = in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ *)B->data;
598               bimax = b->imax;
599               bi    = b->i;
600               bilen = b->ilen;
601               bj    = b->j;
602               ba    = b->a;
603               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
604               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
612               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
613                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
614               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
615             }
616           } else col = in[j];
617           nonew = b->nonew;
618           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
619         }
620       }
621     } else {
622       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
623       if (!aij->donotstash) {
624         mat->assembled = PETSC_FALSE;
625         if (roworiented) {
626           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
627         } else {
628           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
629         }
630       }
631     }
632   }
633   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
634   PetscCall(MatSeqAIJRestoreArray(B, &ba));
635   PetscFunctionReturn(PETSC_SUCCESS);
636 }
637 
638 /*
639     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
640     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
641     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
642 */
643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
644 {
645   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
646   Mat         A      = aij->A; /* diagonal part of the matrix */
647   Mat         B      = aij->B; /* off-diagonal part of the matrix */
648   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
649   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
650   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
651   PetscInt   *ailen = a->ilen, *aj = a->j;
652   PetscInt   *bilen = b->ilen, *bj = b->j;
653   PetscInt    am          = aij->A->rmap->n, j;
654   PetscInt    diag_so_far = 0, dnz;
655   PetscInt    offd_so_far = 0, onz;
656 
657   PetscFunctionBegin;
658   /* Iterate over all rows of the matrix */
659   for (j = 0; j < am; j++) {
660     dnz = onz = 0;
661     /*  Iterate over all non-zero columns of the current row */
662     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
663       /* If column is in the diagonal */
664       if (mat_j[col] >= cstart && mat_j[col] < cend) {
665         aj[diag_so_far++] = mat_j[col] - cstart;
666         dnz++;
667       } else { /* off-diagonal entries */
668         bj[offd_so_far++] = mat_j[col];
669         onz++;
670       }
671     }
672     ailen[j] = dnz;
673     bilen[j] = onz;
674   }
675   PetscFunctionReturn(PETSC_SUCCESS);
676 }
677 
678 /*
679     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
680     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
681     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
682     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
683     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
684 */
685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
686 {
687   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
688   Mat          A    = aij->A; /* diagonal part of the matrix */
689   Mat          B    = aij->B; /* off-diagonal part of the matrix */
690   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
691   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
692   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
693   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
694   PetscInt    *ailen = a->ilen, *aj = a->j;
695   PetscInt    *bilen = b->ilen, *bj = b->j;
696   PetscInt     am          = aij->A->rmap->n, j;
697   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
698   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
699   PetscScalar *aa = a->a, *ba = b->a;
700 
701   PetscFunctionBegin;
702   /* Iterate over all rows of the matrix */
703   for (j = 0; j < am; j++) {
704     dnz_row = onz_row = 0;
705     rowstart_offd     = full_offd_i[j];
706     rowstart_diag     = full_diag_i[j];
707     /*  Iterate over all non-zero columns of the current row */
708     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
709       /* If column is in the diagonal */
710       if (mat_j[col] >= cstart && mat_j[col] < cend) {
711         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
712         aa[rowstart_diag + dnz_row] = mat_a[col];
713         dnz_row++;
714       } else { /* off-diagonal entries */
715         bj[rowstart_offd + onz_row] = mat_j[col];
716         ba[rowstart_offd + onz_row] = mat_a[col];
717         onz_row++;
718       }
719     }
720     ailen[j] = dnz_row;
721     bilen[j] = onz_row;
722   }
723   PetscFunctionReturn(PETSC_SUCCESS);
724 }
725 
726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
727 {
728   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
729   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
730   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
731 
732   PetscFunctionBegin;
733   for (i = 0; i < m; i++) {
734     if (idxm[i] < 0) continue; /* negative row */
735     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
736     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
737     row = idxm[i] - rstart;
738     for (j = 0; j < n; j++) {
739       if (idxn[j] < 0) continue; /* negative column */
740       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
741       if (idxn[j] >= cstart && idxn[j] < cend) {
742         col = idxn[j] - cstart;
743         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
744       } else {
745         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
746 #if defined(PETSC_USE_CTABLE)
747         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
748         col--;
749 #else
750         col = aij->colmap[idxn[j]] - 1;
751 #endif
752         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
753         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
754       }
755     }
756   }
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
761 {
762   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
763   PetscInt    nstash, reallocs;
764 
765   PetscFunctionBegin;
766   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
767 
768   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
769   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
770   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
771   PetscFunctionReturn(PETSC_SUCCESS);
772 }
773 
774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
775 {
776   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
777   PetscMPIInt  n;
778   PetscInt     i, j, rstart, ncols, flg;
779   PetscInt    *row, *col;
780   PetscBool    other_disassembled;
781   PetscScalar *val;
782 
783   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
784 
785   PetscFunctionBegin;
786   if (!aij->donotstash && !mat->nooffprocentries) {
787     while (1) {
788       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
789       if (!flg) break;
790 
791       for (i = 0; i < n;) {
792         /* Now identify the consecutive vals belonging to the same row */
793         for (j = i, rstart = row[j]; j < n; j++) {
794           if (row[j] != rstart) break;
795         }
796         if (j < n) ncols = j - i;
797         else ncols = n - i;
798         /* Now assemble all these values with a single function call */
799         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
800         i = j;
801       }
802     }
803     PetscCall(MatStashScatterEnd_Private(&mat->stash));
804   }
805 #if defined(PETSC_HAVE_DEVICE)
806   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
807   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
808   if (mat->boundtocpu) {
809     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
810     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
811   }
812 #endif
813   PetscCall(MatAssemblyBegin(aij->A, mode));
814   PetscCall(MatAssemblyEnd(aij->A, mode));
815 
816   /* determine if any processor has disassembled, if so we must
817      also disassemble ourself, in order that we may reassemble. */
818   /*
819      if nonzero structure of submatrix B cannot change then we know that
820      no processor disassembled thus we can skip this stuff
821   */
822   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
823     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
824     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
825       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
826     }
827   }
828   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
829   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
830 #if defined(PETSC_HAVE_DEVICE)
831   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
832 #endif
833   PetscCall(MatAssemblyBegin(aij->B, mode));
834   PetscCall(MatAssemblyEnd(aij->B, mode));
835 
836   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
837 
838   aij->rowvalues = NULL;
839 
840   PetscCall(VecDestroy(&aij->diag));
841 
842   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
843   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
844     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
845     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
846   }
847 #if defined(PETSC_HAVE_DEVICE)
848   mat->offloadmask = PETSC_OFFLOAD_BOTH;
849 #endif
850   PetscFunctionReturn(PETSC_SUCCESS);
851 }
852 
853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
854 {
855   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
856 
857   PetscFunctionBegin;
858   PetscCall(MatZeroEntries(l->A));
859   PetscCall(MatZeroEntries(l->B));
860   PetscFunctionReturn(PETSC_SUCCESS);
861 }
862 
863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
864 {
865   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
866   PetscInt   *lrows;
867   PetscInt    r, len;
868   PetscBool   cong;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
873   PetscCall(MatHasCongruentLayouts(A, &cong));
874   /* fix right-hand side if needed */
875   if (x && b) {
876     const PetscScalar *xx;
877     PetscScalar       *bb;
878 
879     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
880     PetscCall(VecGetArrayRead(x, &xx));
881     PetscCall(VecGetArray(b, &bb));
882     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
883     PetscCall(VecRestoreArrayRead(x, &xx));
884     PetscCall(VecRestoreArray(b, &bb));
885   }
886 
887   if (diag != 0.0 && cong) {
888     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
889     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
890   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
891     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
892     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
893     PetscInt    nnwA, nnwB;
894     PetscBool   nnzA, nnzB;
895 
896     nnwA = aijA->nonew;
897     nnwB = aijB->nonew;
898     nnzA = aijA->keepnonzeropattern;
899     nnzB = aijB->keepnonzeropattern;
900     if (!nnzA) {
901       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
902       aijA->nonew = 0;
903     }
904     if (!nnzB) {
905       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
906       aijB->nonew = 0;
907     }
908     /* Must zero here before the next loop */
909     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
910     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
911     for (r = 0; r < len; ++r) {
912       const PetscInt row = lrows[r] + A->rmap->rstart;
913       if (row >= A->cmap->N) continue;
914       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
915     }
916     aijA->nonew = nnwA;
917     aijB->nonew = nnwB;
918   } else {
919     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
920     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
921   }
922   PetscCall(PetscFree(lrows));
923   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
924   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
928     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
929     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
930   }
931   PetscFunctionReturn(PETSC_SUCCESS);
932 }
933 
934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
935 {
936   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
937   PetscInt           n = A->rmap->n;
938   PetscInt           i, j, r, m, len = 0;
939   PetscInt          *lrows, *owners = A->rmap->range;
940   PetscMPIInt        p = 0;
941   PetscSFNode       *rrows;
942   PetscSF            sf;
943   const PetscScalar *xx;
944   PetscScalar       *bb, *mask, *aij_a;
945   Vec                xmask, lmask;
946   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
947   const PetscInt    *aj, *ii, *ridx;
948   PetscScalar       *aa;
949 
950   PetscFunctionBegin;
951   /* Create SF where leaves are input rows and roots are owned rows */
952   PetscCall(PetscMalloc1(n, &lrows));
953   for (r = 0; r < n; ++r) lrows[r] = -1;
954   PetscCall(PetscMalloc1(N, &rrows));
955   for (r = 0; r < N; ++r) {
956     const PetscInt idx = rows[r];
957     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
958     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
959       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
960     }
961     rrows[r].rank  = p;
962     rrows[r].index = rows[r] - owners[p];
963   }
964   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
965   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
966   /* Collect flags for rows to be zeroed */
967   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
968   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
969   PetscCall(PetscSFDestroy(&sf));
970   /* Compress and put in row numbers */
971   for (r = 0; r < n; ++r)
972     if (lrows[r] >= 0) lrows[len++] = r;
973   /* zero diagonal part of matrix */
974   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
975   /* handle off-diagonal part of matrix */
976   PetscCall(MatCreateVecs(A, &xmask, NULL));
977   PetscCall(VecDuplicate(l->lvec, &lmask));
978   PetscCall(VecGetArray(xmask, &bb));
979   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
980   PetscCall(VecRestoreArray(xmask, &bb));
981   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
982   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
983   PetscCall(VecDestroy(&xmask));
984   if (x && b) { /* this code is buggy when the row and column layout don't match */
985     PetscBool cong;
986 
987     PetscCall(MatHasCongruentLayouts(A, &cong));
988     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
989     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
990     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
991     PetscCall(VecGetArrayRead(l->lvec, &xx));
992     PetscCall(VecGetArray(b, &bb));
993   }
994   PetscCall(VecGetArray(lmask, &mask));
995   /* remove zeroed rows of off-diagonal matrix */
996   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
997   ii = aij->i;
998   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
999   /* loop over all elements of off process part of matrix zeroing removed columns*/
1000   if (aij->compressedrow.use) {
1001     m    = aij->compressedrow.nrows;
1002     ii   = aij->compressedrow.i;
1003     ridx = aij->compressedrow.rindex;
1004     for (i = 0; i < m; i++) {
1005       n  = ii[i + 1] - ii[i];
1006       aj = aij->j + ii[i];
1007       aa = aij_a + ii[i];
1008 
1009       for (j = 0; j < n; j++) {
1010         if (PetscAbsScalar(mask[*aj])) {
1011           if (b) bb[*ridx] -= *aa * xx[*aj];
1012           *aa = 0.0;
1013         }
1014         aa++;
1015         aj++;
1016       }
1017       ridx++;
1018     }
1019   } else { /* do not use compressed row format */
1020     m = l->B->rmap->n;
1021     for (i = 0; i < m; i++) {
1022       n  = ii[i + 1] - ii[i];
1023       aj = aij->j + ii[i];
1024       aa = aij_a + ii[i];
1025       for (j = 0; j < n; j++) {
1026         if (PetscAbsScalar(mask[*aj])) {
1027           if (b) bb[i] -= *aa * xx[*aj];
1028           *aa = 0.0;
1029         }
1030         aa++;
1031         aj++;
1032       }
1033     }
1034   }
1035   if (x && b) {
1036     PetscCall(VecRestoreArray(b, &bb));
1037     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1038   }
1039   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1040   PetscCall(VecRestoreArray(lmask, &mask));
1041   PetscCall(VecDestroy(&lmask));
1042   PetscCall(PetscFree(lrows));
1043 
1044   /* only change matrix nonzero state if pattern was allowed to be changed */
1045   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1046     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1047     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1048   }
1049   PetscFunctionReturn(PETSC_SUCCESS);
1050 }
1051 
1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1053 {
1054   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1055   PetscInt    nt;
1056   VecScatter  Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   PetscCall(VecGetLocalSize(xx, &nt));
1060   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1061   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1062   PetscUseTypeMethod(a->A, mult, xx, yy);
1063   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1065   PetscFunctionReturn(PETSC_SUCCESS);
1066 }
1067 
1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1069 {
1070   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1071 
1072   PetscFunctionBegin;
1073   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1078 {
1079   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1080   VecScatter  Mvctx = a->Mvctx;
1081 
1082   PetscFunctionBegin;
1083   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1084   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1085   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1086   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1087   PetscFunctionReturn(PETSC_SUCCESS);
1088 }
1089 
1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1091 {
1092   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1093 
1094   PetscFunctionBegin;
1095   /* do nondiagonal part */
1096   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1097   /* do local part */
1098   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1099   /* add partial results together */
1100   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1101   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1102   PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104 
1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1106 {
1107   MPI_Comm    comm;
1108   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1109   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1110   IS          Me, Notme;
1111   PetscInt    M, N, first, last, *notme, i;
1112   PetscBool   lf;
1113   PetscMPIInt size;
1114 
1115   PetscFunctionBegin;
1116   /* Easy test: symmetric diagonal block */
1117   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1118   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1119   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1120   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1121   PetscCallMPI(MPI_Comm_size(comm, &size));
1122   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1123 
1124   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1125   PetscCall(MatGetSize(Amat, &M, &N));
1126   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1127   PetscCall(PetscMalloc1(N - last + first, &notme));
1128   for (i = 0; i < first; i++) notme[i] = i;
1129   for (i = last; i < M; i++) notme[i - last + first] = i;
1130   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1131   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1132   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1133   Aoff = Aoffs[0];
1134   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1135   Boff = Boffs[0];
1136   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1137   PetscCall(MatDestroyMatrices(1, &Aoffs));
1138   PetscCall(MatDestroyMatrices(1, &Boffs));
1139   PetscCall(ISDestroy(&Me));
1140   PetscCall(ISDestroy(&Notme));
1141   PetscCall(PetscFree(notme));
1142   PetscFunctionReturn(PETSC_SUCCESS);
1143 }
1144 
1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1146 {
1147   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1148 
1149   PetscFunctionBegin;
1150   /* do nondiagonal part */
1151   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1152   /* do local part */
1153   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1154   /* add partial results together */
1155   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1156   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1157   PetscFunctionReturn(PETSC_SUCCESS);
1158 }
1159 
1160 /*
1161   This only works correctly for square matrices where the subblock A->A is the
1162    diagonal block
1163 */
1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1165 {
1166   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1167 
1168   PetscFunctionBegin;
1169   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1170   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1171   PetscCall(MatGetDiagonal(a->A, v));
1172   PetscFunctionReturn(PETSC_SUCCESS);
1173 }
1174 
1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1176 {
1177   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(MatScale(a->A, aa));
1181   PetscCall(MatScale(a->B, aa));
1182   PetscFunctionReturn(PETSC_SUCCESS);
1183 }
1184 
1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1186 {
1187   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1188   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1189   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1190   const PetscInt    *garray = aij->garray;
1191   const PetscScalar *aa, *ba;
1192   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1193   PetscInt64         nz, hnz;
1194   PetscInt          *rowlens;
1195   PetscInt          *colidxs;
1196   PetscScalar       *matvals;
1197   PetscMPIInt        rank;
1198 
1199   PetscFunctionBegin;
1200   PetscCall(PetscViewerSetUp(viewer));
1201 
1202   M  = mat->rmap->N;
1203   N  = mat->cmap->N;
1204   m  = mat->rmap->n;
1205   rs = mat->rmap->rstart;
1206   cs = mat->cmap->rstart;
1207   nz = A->nz + B->nz;
1208 
1209   /* write matrix header */
1210   header[0] = MAT_FILE_CLASSID;
1211   header[1] = M;
1212   header[2] = N;
1213   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1214   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1215   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1216   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1217 
1218   /* fill in and store row lengths  */
1219   PetscCall(PetscMalloc1(m, &rowlens));
1220   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1221   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1222   PetscCall(PetscFree(rowlens));
1223 
1224   /* fill in and store column indices */
1225   PetscCall(PetscMalloc1(nz, &colidxs));
1226   for (cnt = 0, i = 0; i < m; i++) {
1227     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1228       if (garray[B->j[jb]] > cs) break;
1229       colidxs[cnt++] = garray[B->j[jb]];
1230     }
1231     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1232     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1233   }
1234   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1235   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1236   PetscCall(PetscFree(colidxs));
1237 
1238   /* fill in and store nonzero values */
1239   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1240   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1241   PetscCall(PetscMalloc1(nz, &matvals));
1242   for (cnt = 0, i = 0; i < m; i++) {
1243     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1244       if (garray[B->j[jb]] > cs) break;
1245       matvals[cnt++] = ba[jb];
1246     }
1247     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1248     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1249   }
1250   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1251   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1252   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1253   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1254   PetscCall(PetscFree(matvals));
1255 
1256   /* write block size option to the viewer's .info file */
1257   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1258   PetscFunctionReturn(PETSC_SUCCESS);
1259 }
1260 
1261 #include <petscdraw.h>
1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1263 {
1264   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1265   PetscMPIInt       rank = aij->rank, size = aij->size;
1266   PetscBool         isdraw, iascii, isbinary;
1267   PetscViewer       sviewer;
1268   PetscViewerFormat format;
1269 
1270   PetscFunctionBegin;
1271   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1272   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1273   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1274   if (iascii) {
1275     PetscCall(PetscViewerGetFormat(viewer, &format));
1276     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1277       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1278       PetscCall(PetscMalloc1(size, &nz));
1279       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1280       for (i = 0; i < size; i++) {
1281         nmax = PetscMax(nmax, nz[i]);
1282         nmin = PetscMin(nmin, nz[i]);
1283         navg += nz[i];
1284       }
1285       PetscCall(PetscFree(nz));
1286       navg = navg / size;
1287       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1288       PetscFunctionReturn(PETSC_SUCCESS);
1289     }
1290     PetscCall(PetscViewerGetFormat(viewer, &format));
1291     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1292       MatInfo   info;
1293       PetscInt *inodes = NULL;
1294 
1295       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1296       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1297       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1298       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1299       if (!inodes) {
1300         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1301                                                      info.memory));
1302       } else {
1303         PetscCall(
1304           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory));
1305       }
1306       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1307       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1308       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1309       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1310       PetscCall(PetscViewerFlush(viewer));
1311       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1312       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1313       PetscCall(VecScatterView(aij->Mvctx, viewer));
1314       PetscFunctionReturn(PETSC_SUCCESS);
1315     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1316       PetscInt inodecount, inodelimit, *inodes;
1317       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1318       if (inodes) {
1319         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1320       } else {
1321         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1322       }
1323       PetscFunctionReturn(PETSC_SUCCESS);
1324     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1325       PetscFunctionReturn(PETSC_SUCCESS);
1326     }
1327   } else if (isbinary) {
1328     if (size == 1) {
1329       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1330       PetscCall(MatView(aij->A, viewer));
1331     } else {
1332       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1333     }
1334     PetscFunctionReturn(PETSC_SUCCESS);
1335   } else if (iascii && size == 1) {
1336     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1337     PetscCall(MatView(aij->A, viewer));
1338     PetscFunctionReturn(PETSC_SUCCESS);
1339   } else if (isdraw) {
1340     PetscDraw draw;
1341     PetscBool isnull;
1342     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1343     PetscCall(PetscDrawIsNull(draw, &isnull));
1344     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1345   }
1346 
1347   { /* assemble the entire matrix onto first processor */
1348     Mat A = NULL, Av;
1349     IS  isrow, iscol;
1350 
1351     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1352     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1353     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1354     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1355     /*  The commented code uses MatCreateSubMatrices instead */
1356     /*
1357     Mat *AA, A = NULL, Av;
1358     IS  isrow,iscol;
1359 
1360     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1361     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1362     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1363     if (rank == 0) {
1364        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1365        A    = AA[0];
1366        Av   = AA[0];
1367     }
1368     PetscCall(MatDestroySubMatrices(1,&AA));
1369 */
1370     PetscCall(ISDestroy(&iscol));
1371     PetscCall(ISDestroy(&isrow));
1372     /*
1373        Everyone has to call to draw the matrix since the graphics waits are
1374        synchronized across all processors that share the PetscDraw object
1375     */
1376     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1377     if (rank == 0) {
1378       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1379       PetscCall(MatView_SeqAIJ(Av, sviewer));
1380     }
1381     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1382     PetscCall(MatDestroy(&A));
1383   }
1384   PetscFunctionReturn(PETSC_SUCCESS);
1385 }
1386 
1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1388 {
1389   PetscBool iascii, isdraw, issocket, isbinary;
1390 
1391   PetscFunctionBegin;
1392   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1393   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1395   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1396   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1397   PetscFunctionReturn(PETSC_SUCCESS);
1398 }
1399 
1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1401 {
1402   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1403   Vec         bb1 = NULL;
1404   PetscBool   hasop;
1405 
1406   PetscFunctionBegin;
1407   if (flag == SOR_APPLY_UPPER) {
1408     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1409     PetscFunctionReturn(PETSC_SUCCESS);
1410   }
1411 
1412   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1413 
1414   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419 
1420     while (its--) {
1421       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1423 
1424       /* update rhs: bb1 = bb - B*x */
1425       PetscCall(VecScale(mat->lvec, -1.0));
1426       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1427 
1428       /* local sweep */
1429       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1430     }
1431   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1432     if (flag & SOR_ZERO_INITIAL_GUESS) {
1433       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1434       its--;
1435     }
1436     while (its--) {
1437       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1439 
1440       /* update rhs: bb1 = bb - B*x */
1441       PetscCall(VecScale(mat->lvec, -1.0));
1442       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1443 
1444       /* local sweep */
1445       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1446     }
1447   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1448     if (flag & SOR_ZERO_INITIAL_GUESS) {
1449       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1450       its--;
1451     }
1452     while (its--) {
1453       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1455 
1456       /* update rhs: bb1 = bb - B*x */
1457       PetscCall(VecScale(mat->lvec, -1.0));
1458       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1459 
1460       /* local sweep */
1461       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1462     }
1463   } else if (flag & SOR_EISENSTAT) {
1464     Vec xx1;
1465 
1466     PetscCall(VecDuplicate(bb, &xx1));
1467     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1468 
1469     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1470     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1471     if (!mat->diag) {
1472       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1473       PetscCall(MatGetDiagonal(matin, mat->diag));
1474     }
1475     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1476     if (hasop) {
1477       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1478     } else {
1479       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1480     }
1481     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1482 
1483     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1484 
1485     /* local sweep */
1486     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1487     PetscCall(VecAXPY(xx, 1.0, xx1));
1488     PetscCall(VecDestroy(&xx1));
1489   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1490 
1491   PetscCall(VecDestroy(&bb1));
1492 
1493   matin->factorerrortype = mat->A->factorerrortype;
1494   PetscFunctionReturn(PETSC_SUCCESS);
1495 }
1496 
1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1498 {
1499   Mat             aA, aB, Aperm;
1500   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1501   PetscScalar    *aa, *ba;
1502   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1503   PetscSF         rowsf, sf;
1504   IS              parcolp = NULL;
1505   PetscBool       done;
1506 
1507   PetscFunctionBegin;
1508   PetscCall(MatGetLocalSize(A, &m, &n));
1509   PetscCall(ISGetIndices(rowp, &rwant));
1510   PetscCall(ISGetIndices(colp, &cwant));
1511   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1512 
1513   /* Invert row permutation to find out where my rows should go */
1514   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1515   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1516   PetscCall(PetscSFSetFromOptions(rowsf));
1517   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1518   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1519   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1520 
1521   /* Invert column permutation to find out where my columns should go */
1522   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1523   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1524   PetscCall(PetscSFSetFromOptions(sf));
1525   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1526   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1527   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1528   PetscCall(PetscSFDestroy(&sf));
1529 
1530   PetscCall(ISRestoreIndices(rowp, &rwant));
1531   PetscCall(ISRestoreIndices(colp, &cwant));
1532   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1533 
1534   /* Find out where my gcols should go */
1535   PetscCall(MatGetSize(aB, NULL, &ng));
1536   PetscCall(PetscMalloc1(ng, &gcdest));
1537   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1538   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1539   PetscCall(PetscSFSetFromOptions(sf));
1540   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1541   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1542   PetscCall(PetscSFDestroy(&sf));
1543 
1544   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1545   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1546   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1547   for (i = 0; i < m; i++) {
1548     PetscInt    row = rdest[i];
1549     PetscMPIInt rowner;
1550     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1551     for (j = ai[i]; j < ai[i + 1]; j++) {
1552       PetscInt    col = cdest[aj[j]];
1553       PetscMPIInt cowner;
1554       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1555       if (rowner == cowner) dnnz[i]++;
1556       else onnz[i]++;
1557     }
1558     for (j = bi[i]; j < bi[i + 1]; j++) {
1559       PetscInt    col = gcdest[bj[j]];
1560       PetscMPIInt cowner;
1561       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1562       if (rowner == cowner) dnnz[i]++;
1563       else onnz[i]++;
1564     }
1565   }
1566   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1567   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1568   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1569   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1570   PetscCall(PetscSFDestroy(&rowsf));
1571 
1572   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1573   PetscCall(MatSeqAIJGetArray(aA, &aa));
1574   PetscCall(MatSeqAIJGetArray(aB, &ba));
1575   for (i = 0; i < m; i++) {
1576     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1577     PetscInt  j0, rowlen;
1578     rowlen = ai[i + 1] - ai[i];
1579     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1580       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1581       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1582     }
1583     rowlen = bi[i + 1] - bi[i];
1584     for (j0 = j = 0; j < rowlen; j0 = j) {
1585       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1586       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1587     }
1588   }
1589   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1590   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1591   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1592   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1593   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1594   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1595   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1596   PetscCall(PetscFree3(work, rdest, cdest));
1597   PetscCall(PetscFree(gcdest));
1598   if (parcolp) PetscCall(ISDestroy(&colp));
1599   *B = Aperm;
1600   PetscFunctionReturn(PETSC_SUCCESS);
1601 }
1602 
1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1604 {
1605   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1606 
1607   PetscFunctionBegin;
1608   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1609   if (ghosts) *ghosts = aij->garray;
1610   PetscFunctionReturn(PETSC_SUCCESS);
1611 }
1612 
1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1614 {
1615   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1616   Mat            A = mat->A, B = mat->B;
1617   PetscLogDouble isend[5], irecv[5];
1618 
1619   PetscFunctionBegin;
1620   info->block_size = 1.0;
1621   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1622 
1623   isend[0] = info->nz_used;
1624   isend[1] = info->nz_allocated;
1625   isend[2] = info->nz_unneeded;
1626   isend[3] = info->memory;
1627   isend[4] = info->mallocs;
1628 
1629   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1630 
1631   isend[0] += info->nz_used;
1632   isend[1] += info->nz_allocated;
1633   isend[2] += info->nz_unneeded;
1634   isend[3] += info->memory;
1635   isend[4] += info->mallocs;
1636   if (flag == MAT_LOCAL) {
1637     info->nz_used      = isend[0];
1638     info->nz_allocated = isend[1];
1639     info->nz_unneeded  = isend[2];
1640     info->memory       = isend[3];
1641     info->mallocs      = isend[4];
1642   } else if (flag == MAT_GLOBAL_MAX) {
1643     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1644 
1645     info->nz_used      = irecv[0];
1646     info->nz_allocated = irecv[1];
1647     info->nz_unneeded  = irecv[2];
1648     info->memory       = irecv[3];
1649     info->mallocs      = irecv[4];
1650   } else if (flag == MAT_GLOBAL_SUM) {
1651     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1652 
1653     info->nz_used      = irecv[0];
1654     info->nz_allocated = irecv[1];
1655     info->nz_unneeded  = irecv[2];
1656     info->memory       = irecv[3];
1657     info->mallocs      = irecv[4];
1658   }
1659   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1660   info->fill_ratio_needed = 0;
1661   info->factor_mallocs    = 0;
1662   PetscFunctionReturn(PETSC_SUCCESS);
1663 }
1664 
1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1666 {
1667   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1668 
1669   PetscFunctionBegin;
1670   switch (op) {
1671   case MAT_NEW_NONZERO_LOCATIONS:
1672   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1673   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1674   case MAT_KEEP_NONZERO_PATTERN:
1675   case MAT_NEW_NONZERO_LOCATION_ERR:
1676   case MAT_USE_INODES:
1677   case MAT_IGNORE_ZERO_ENTRIES:
1678   case MAT_FORM_EXPLICIT_TRANSPOSE:
1679     MatCheckPreallocated(A, 1);
1680     PetscCall(MatSetOption(a->A, op, flg));
1681     PetscCall(MatSetOption(a->B, op, flg));
1682     break;
1683   case MAT_ROW_ORIENTED:
1684     MatCheckPreallocated(A, 1);
1685     a->roworiented = flg;
1686 
1687     PetscCall(MatSetOption(a->A, op, flg));
1688     PetscCall(MatSetOption(a->B, op, flg));
1689     break;
1690   case MAT_IGNORE_OFF_PROC_ENTRIES:
1691     a->donotstash = flg;
1692     break;
1693   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1694   case MAT_SPD:
1695   case MAT_SYMMETRIC:
1696   case MAT_STRUCTURALLY_SYMMETRIC:
1697   case MAT_HERMITIAN:
1698   case MAT_SYMMETRY_ETERNAL:
1699   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1700   case MAT_SPD_ETERNAL:
1701     /* if the diagonal matrix is square it inherits some of the properties above */
1702     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1703     break;
1704   case MAT_SUBMAT_SINGLEIS:
1705     A->submat_singleis = flg;
1706     break;
1707   default:
1708     break;
1709   }
1710   PetscFunctionReturn(PETSC_SUCCESS);
1711 }
1712 
1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1714 {
1715   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1716   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1717   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1718   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1719   PetscInt    *cmap, *idx_p;
1720 
1721   PetscFunctionBegin;
1722   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1723   mat->getrowactive = PETSC_TRUE;
1724 
1725   if (!mat->rowvalues && (idx || v)) {
1726     /*
1727         allocate enough space to hold information from the longest row.
1728     */
1729     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1730     PetscInt    max = 1, tmp;
1731     for (i = 0; i < matin->rmap->n; i++) {
1732       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1733       if (max < tmp) max = tmp;
1734     }
1735     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1736   }
1737 
1738   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1739   lrow = row - rstart;
1740 
1741   pvA = &vworkA;
1742   pcA = &cworkA;
1743   pvB = &vworkB;
1744   pcB = &cworkB;
1745   if (!v) {
1746     pvA = NULL;
1747     pvB = NULL;
1748   }
1749   if (!idx) {
1750     pcA = NULL;
1751     if (!v) pcB = NULL;
1752   }
1753   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1754   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1755   nztot = nzA + nzB;
1756 
1757   cmap = mat->garray;
1758   if (v || idx) {
1759     if (nztot) {
1760       /* Sort by increasing column numbers, assuming A and B already sorted */
1761       PetscInt imark = -1;
1762       if (v) {
1763         *v = v_p = mat->rowvalues;
1764         for (i = 0; i < nzB; i++) {
1765           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1766           else break;
1767         }
1768         imark = i;
1769         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1770         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1771       }
1772       if (idx) {
1773         *idx = idx_p = mat->rowindices;
1774         if (imark > -1) {
1775           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1776         } else {
1777           for (i = 0; i < nzB; i++) {
1778             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1779             else break;
1780           }
1781           imark = i;
1782         }
1783         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1784         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1785       }
1786     } else {
1787       if (idx) *idx = NULL;
1788       if (v) *v = NULL;
1789     }
1790   }
1791   *nz = nztot;
1792   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1793   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1798 {
1799   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1800 
1801   PetscFunctionBegin;
1802   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1803   aij->getrowactive = PETSC_FALSE;
1804   PetscFunctionReturn(PETSC_SUCCESS);
1805 }
1806 
1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1808 {
1809   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1810   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1811   PetscInt         i, j, cstart = mat->cmap->rstart;
1812   PetscReal        sum = 0.0;
1813   const MatScalar *v, *amata, *bmata;
1814 
1815   PetscFunctionBegin;
1816   if (aij->size == 1) {
1817     PetscCall(MatNorm(aij->A, type, norm));
1818   } else {
1819     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1820     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1821     if (type == NORM_FROBENIUS) {
1822       v = amata;
1823       for (i = 0; i < amat->nz; i++) {
1824         sum += PetscRealPart(PetscConj(*v) * (*v));
1825         v++;
1826       }
1827       v = bmata;
1828       for (i = 0; i < bmat->nz; i++) {
1829         sum += PetscRealPart(PetscConj(*v) * (*v));
1830         v++;
1831       }
1832       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1833       *norm = PetscSqrtReal(*norm);
1834       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1835     } else if (type == NORM_1) { /* max column norm */
1836       PetscReal *tmp;
1837       PetscInt  *jj, *garray = aij->garray;
1838       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1839       *norm = 0.0;
1840       v     = amata;
1841       jj    = amat->j;
1842       for (j = 0; j < amat->nz; j++) {
1843         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1844         v++;
1845       }
1846       v  = bmata;
1847       jj = bmat->j;
1848       for (j = 0; j < bmat->nz; j++) {
1849         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1850         v++;
1851       }
1852       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1853       for (j = 0; j < mat->cmap->N; j++) {
1854         if (tmp[j] > *norm) *norm = tmp[j];
1855       }
1856       PetscCall(PetscFree(tmp));
1857       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1858     } else if (type == NORM_INFINITY) { /* max row norm */
1859       PetscReal ntemp = 0.0;
1860       for (j = 0; j < aij->A->rmap->n; j++) {
1861         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1862         sum = 0.0;
1863         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1864           sum += PetscAbsScalar(*v);
1865           v++;
1866         }
1867         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1868         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1869           sum += PetscAbsScalar(*v);
1870           v++;
1871         }
1872         if (sum > ntemp) ntemp = sum;
1873       }
1874       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1875       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1876     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1877     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1878     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1879   }
1880   PetscFunctionReturn(PETSC_SUCCESS);
1881 }
1882 
1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1884 {
1885   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1886   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1887   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1888   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1889   Mat              B, A_diag, *B_diag;
1890   const MatScalar *pbv, *bv;
1891 
1892   PetscFunctionBegin;
1893   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1894   ma = A->rmap->n;
1895   na = A->cmap->n;
1896   mb = a->B->rmap->n;
1897   nb = a->B->cmap->n;
1898   ai = Aloc->i;
1899   aj = Aloc->j;
1900   bi = Bloc->i;
1901   bj = Bloc->j;
1902   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1903     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1904     PetscSFNode         *oloc;
1905     PETSC_UNUSED PetscSF sf;
1906 
1907     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1908     /* compute d_nnz for preallocation */
1909     PetscCall(PetscArrayzero(d_nnz, na));
1910     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1911     /* compute local off-diagonal contributions */
1912     PetscCall(PetscArrayzero(g_nnz, nb));
1913     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1914     /* map those to global */
1915     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1916     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1917     PetscCall(PetscSFSetFromOptions(sf));
1918     PetscCall(PetscArrayzero(o_nnz, na));
1919     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1920     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1921     PetscCall(PetscSFDestroy(&sf));
1922 
1923     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1924     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1925     PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs));
1926     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1927     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1928     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1929   } else {
1930     B = *matout;
1931     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1932   }
1933 
1934   b           = (Mat_MPIAIJ *)B->data;
1935   A_diag      = a->A;
1936   B_diag      = &b->A;
1937   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1938   A_diag_ncol = A_diag->cmap->N;
1939   B_diag_ilen = sub_B_diag->ilen;
1940   B_diag_i    = sub_B_diag->i;
1941 
1942   /* Set ilen for diagonal of B */
1943   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1944 
1945   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1946   very quickly (=without using MatSetValues), because all writes are local. */
1947   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1948   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1949 
1950   /* copy over the B part */
1951   PetscCall(PetscMalloc1(bi[mb], &cols));
1952   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1953   pbv = bv;
1954   row = A->rmap->rstart;
1955   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1956   cols_tmp = cols;
1957   for (i = 0; i < mb; i++) {
1958     ncol = bi[i + 1] - bi[i];
1959     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1960     row++;
1961     if (pbv) pbv += ncol;
1962     if (cols_tmp) cols_tmp += ncol;
1963   }
1964   PetscCall(PetscFree(cols));
1965   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1966 
1967   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1968   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1969   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1970     *matout = B;
1971   } else {
1972     PetscCall(MatHeaderMerge(A, &B));
1973   }
1974   PetscFunctionReturn(PETSC_SUCCESS);
1975 }
1976 
1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1978 {
1979   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1980   Mat         a = aij->A, b = aij->B;
1981   PetscInt    s1, s2, s3;
1982 
1983   PetscFunctionBegin;
1984   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1985   if (rr) {
1986     PetscCall(VecGetLocalSize(rr, &s1));
1987     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1988     /* Overlap communication with computation. */
1989     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1990   }
1991   if (ll) {
1992     PetscCall(VecGetLocalSize(ll, &s1));
1993     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1994     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1995   }
1996   /* scale  the diagonal block */
1997   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1998 
1999   if (rr) {
2000     /* Do a scatter end and then right scale the off-diagonal block */
2001     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2002     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2003   }
2004   PetscFunctionReturn(PETSC_SUCCESS);
2005 }
2006 
2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2008 {
2009   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2010 
2011   PetscFunctionBegin;
2012   PetscCall(MatSetUnfactored(a->A));
2013   PetscFunctionReturn(PETSC_SUCCESS);
2014 }
2015 
2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2017 {
2018   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2019   Mat         a, b, c, d;
2020   PetscBool   flg;
2021 
2022   PetscFunctionBegin;
2023   a = matA->A;
2024   b = matA->B;
2025   c = matB->A;
2026   d = matB->B;
2027 
2028   PetscCall(MatEqual(a, c, &flg));
2029   if (flg) PetscCall(MatEqual(b, d, &flg));
2030   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2035 {
2036   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2037   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2038 
2039   PetscFunctionBegin;
2040   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2041   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2042     /* because of the column compression in the off-processor part of the matrix a->B,
2043        the number of columns in a->B and b->B may be different, hence we cannot call
2044        the MatCopy() directly on the two parts. If need be, we can provide a more
2045        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2046        then copying the submatrices */
2047     PetscCall(MatCopy_Basic(A, B, str));
2048   } else {
2049     PetscCall(MatCopy(a->A, b->A, str));
2050     PetscCall(MatCopy(a->B, b->B, str));
2051   }
2052   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2053   PetscFunctionReturn(PETSC_SUCCESS);
2054 }
2055 
2056 /*
2057    Computes the number of nonzeros per row needed for preallocation when X and Y
2058    have different nonzero structure.
2059 */
2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2061 {
2062   PetscInt i, j, k, nzx, nzy;
2063 
2064   PetscFunctionBegin;
2065   /* Set the number of nonzeros in the new matrix */
2066   for (i = 0; i < m; i++) {
2067     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2068     nzx    = xi[i + 1] - xi[i];
2069     nzy    = yi[i + 1] - yi[i];
2070     nnz[i] = 0;
2071     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2072       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2073       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2074       nnz[i]++;
2075     }
2076     for (; k < nzy; k++) nnz[i]++;
2077   }
2078   PetscFunctionReturn(PETSC_SUCCESS);
2079 }
2080 
2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2083 {
2084   PetscInt    m = Y->rmap->N;
2085   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2086   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2087 
2088   PetscFunctionBegin;
2089   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2090   PetscFunctionReturn(PETSC_SUCCESS);
2091 }
2092 
2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2094 {
2095   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2096 
2097   PetscFunctionBegin;
2098   if (str == SAME_NONZERO_PATTERN) {
2099     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2100     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2101   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2102     PetscCall(MatAXPY_Basic(Y, a, X, str));
2103   } else {
2104     Mat       B;
2105     PetscInt *nnz_d, *nnz_o;
2106 
2107     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2108     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2109     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2110     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2111     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2112     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2113     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2114     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2115     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2116     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2117     PetscCall(MatHeaderMerge(Y, &B));
2118     PetscCall(PetscFree(nnz_d));
2119     PetscCall(PetscFree(nnz_o));
2120   }
2121   PetscFunctionReturn(PETSC_SUCCESS);
2122 }
2123 
2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2125 
2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2127 {
2128   PetscFunctionBegin;
2129   if (PetscDefined(USE_COMPLEX)) {
2130     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2131 
2132     PetscCall(MatConjugate_SeqAIJ(aij->A));
2133     PetscCall(MatConjugate_SeqAIJ(aij->B));
2134   }
2135   PetscFunctionReturn(PETSC_SUCCESS);
2136 }
2137 
2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2139 {
2140   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2141 
2142   PetscFunctionBegin;
2143   PetscCall(MatRealPart(a->A));
2144   PetscCall(MatRealPart(a->B));
2145   PetscFunctionReturn(PETSC_SUCCESS);
2146 }
2147 
2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2149 {
2150   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2151 
2152   PetscFunctionBegin;
2153   PetscCall(MatImaginaryPart(a->A));
2154   PetscCall(MatImaginaryPart(a->B));
2155   PetscFunctionReturn(PETSC_SUCCESS);
2156 }
2157 
2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2159 {
2160   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2161   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2162   PetscScalar       *vv;
2163   Vec                vB, vA;
2164   const PetscScalar *va, *vb;
2165 
2166   PetscFunctionBegin;
2167   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2168   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2169 
2170   PetscCall(VecGetArrayRead(vA, &va));
2171   if (idx) {
2172     for (i = 0; i < m; i++) {
2173       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2174     }
2175   }
2176 
2177   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2178   PetscCall(PetscMalloc1(m, &idxb));
2179   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2180 
2181   PetscCall(VecGetArrayWrite(v, &vv));
2182   PetscCall(VecGetArrayRead(vB, &vb));
2183   for (i = 0; i < m; i++) {
2184     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2185       vv[i] = vb[i];
2186       if (idx) idx[i] = a->garray[idxb[i]];
2187     } else {
2188       vv[i] = va[i];
2189       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2190     }
2191   }
2192   PetscCall(VecRestoreArrayWrite(v, &vv));
2193   PetscCall(VecRestoreArrayRead(vA, &va));
2194   PetscCall(VecRestoreArrayRead(vB, &vb));
2195   PetscCall(PetscFree(idxb));
2196   PetscCall(VecDestroy(&vA));
2197   PetscCall(VecDestroy(&vB));
2198   PetscFunctionReturn(PETSC_SUCCESS);
2199 }
2200 
2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2202 {
2203   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2204   Vec         vB, vA;
2205 
2206   PetscFunctionBegin;
2207   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2208   PetscCall(MatGetRowSumAbs(a->A, vA));
2209   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2210   PetscCall(MatGetRowSumAbs(a->B, vB));
2211   PetscCall(VecAXPY(vA, 1.0, vB));
2212   PetscCall(VecDestroy(&vB));
2213   PetscCall(VecCopy(vA, v));
2214   PetscCall(VecDestroy(&vA));
2215   PetscFunctionReturn(PETSC_SUCCESS);
2216 }
2217 
2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2219 {
2220   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2221   PetscInt           m = A->rmap->n, n = A->cmap->n;
2222   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2223   PetscInt          *cmap = mat->garray;
2224   PetscInt          *diagIdx, *offdiagIdx;
2225   Vec                diagV, offdiagV;
2226   PetscScalar       *a, *diagA, *offdiagA;
2227   const PetscScalar *ba, *bav;
2228   PetscInt           r, j, col, ncols, *bi, *bj;
2229   Mat                B = mat->B;
2230   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2231 
2232   PetscFunctionBegin;
2233   /* When a process holds entire A and other processes have no entry */
2234   if (A->cmap->N == n) {
2235     PetscCall(VecGetArrayWrite(v, &diagA));
2236     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2237     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2238     PetscCall(VecDestroy(&diagV));
2239     PetscCall(VecRestoreArrayWrite(v, &diagA));
2240     PetscFunctionReturn(PETSC_SUCCESS);
2241   } else if (n == 0) {
2242     if (m) {
2243       PetscCall(VecGetArrayWrite(v, &a));
2244       for (r = 0; r < m; r++) {
2245         a[r] = 0.0;
2246         if (idx) idx[r] = -1;
2247       }
2248       PetscCall(VecRestoreArrayWrite(v, &a));
2249     }
2250     PetscFunctionReturn(PETSC_SUCCESS);
2251   }
2252 
2253   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2254   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2255   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2256   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2257 
2258   /* Get offdiagIdx[] for implicit 0.0 */
2259   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2260   ba = bav;
2261   bi = b->i;
2262   bj = b->j;
2263   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2264   for (r = 0; r < m; r++) {
2265     ncols = bi[r + 1] - bi[r];
2266     if (ncols == A->cmap->N - n) { /* Brow is dense */
2267       offdiagA[r]   = *ba;
2268       offdiagIdx[r] = cmap[0];
2269     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2270       offdiagA[r] = 0.0;
2271 
2272       /* Find first hole in the cmap */
2273       for (j = 0; j < ncols; j++) {
2274         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2275         if (col > j && j < cstart) {
2276           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2277           break;
2278         } else if (col > j + n && j >= cstart) {
2279           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2280           break;
2281         }
2282       }
2283       if (j == ncols && ncols < A->cmap->N - n) {
2284         /* a hole is outside compressed Bcols */
2285         if (ncols == 0) {
2286           if (cstart) {
2287             offdiagIdx[r] = 0;
2288           } else offdiagIdx[r] = cend;
2289         } else { /* ncols > 0 */
2290           offdiagIdx[r] = cmap[ncols - 1] + 1;
2291           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2292         }
2293       }
2294     }
2295 
2296     for (j = 0; j < ncols; j++) {
2297       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2298         offdiagA[r]   = *ba;
2299         offdiagIdx[r] = cmap[*bj];
2300       }
2301       ba++;
2302       bj++;
2303     }
2304   }
2305 
2306   PetscCall(VecGetArrayWrite(v, &a));
2307   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2308   for (r = 0; r < m; ++r) {
2309     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2310       a[r] = diagA[r];
2311       if (idx) idx[r] = cstart + diagIdx[r];
2312     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2313       a[r] = diagA[r];
2314       if (idx) {
2315         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2316           idx[r] = cstart + diagIdx[r];
2317         } else idx[r] = offdiagIdx[r];
2318       }
2319     } else {
2320       a[r] = offdiagA[r];
2321       if (idx) idx[r] = offdiagIdx[r];
2322     }
2323   }
2324   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2325   PetscCall(VecRestoreArrayWrite(v, &a));
2326   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2327   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2328   PetscCall(VecDestroy(&diagV));
2329   PetscCall(VecDestroy(&offdiagV));
2330   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2331   PetscFunctionReturn(PETSC_SUCCESS);
2332 }
2333 
2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2335 {
2336   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2337   PetscInt           m = A->rmap->n, n = A->cmap->n;
2338   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2339   PetscInt          *cmap = mat->garray;
2340   PetscInt          *diagIdx, *offdiagIdx;
2341   Vec                diagV, offdiagV;
2342   PetscScalar       *a, *diagA, *offdiagA;
2343   const PetscScalar *ba, *bav;
2344   PetscInt           r, j, col, ncols, *bi, *bj;
2345   Mat                B = mat->B;
2346   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2347 
2348   PetscFunctionBegin;
2349   /* When a process holds entire A and other processes have no entry */
2350   if (A->cmap->N == n) {
2351     PetscCall(VecGetArrayWrite(v, &diagA));
2352     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2353     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2354     PetscCall(VecDestroy(&diagV));
2355     PetscCall(VecRestoreArrayWrite(v, &diagA));
2356     PetscFunctionReturn(PETSC_SUCCESS);
2357   } else if (n == 0) {
2358     if (m) {
2359       PetscCall(VecGetArrayWrite(v, &a));
2360       for (r = 0; r < m; r++) {
2361         a[r] = PETSC_MAX_REAL;
2362         if (idx) idx[r] = -1;
2363       }
2364       PetscCall(VecRestoreArrayWrite(v, &a));
2365     }
2366     PetscFunctionReturn(PETSC_SUCCESS);
2367   }
2368 
2369   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2370   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2371   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2372   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2373 
2374   /* Get offdiagIdx[] for implicit 0.0 */
2375   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2376   ba = bav;
2377   bi = b->i;
2378   bj = b->j;
2379   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2380   for (r = 0; r < m; r++) {
2381     ncols = bi[r + 1] - bi[r];
2382     if (ncols == A->cmap->N - n) { /* Brow is dense */
2383       offdiagA[r]   = *ba;
2384       offdiagIdx[r] = cmap[0];
2385     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2386       offdiagA[r] = 0.0;
2387 
2388       /* Find first hole in the cmap */
2389       for (j = 0; j < ncols; j++) {
2390         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2391         if (col > j && j < cstart) {
2392           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2393           break;
2394         } else if (col > j + n && j >= cstart) {
2395           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2396           break;
2397         }
2398       }
2399       if (j == ncols && ncols < A->cmap->N - n) {
2400         /* a hole is outside compressed Bcols */
2401         if (ncols == 0) {
2402           if (cstart) {
2403             offdiagIdx[r] = 0;
2404           } else offdiagIdx[r] = cend;
2405         } else { /* ncols > 0 */
2406           offdiagIdx[r] = cmap[ncols - 1] + 1;
2407           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2408         }
2409       }
2410     }
2411 
2412     for (j = 0; j < ncols; j++) {
2413       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2414         offdiagA[r]   = *ba;
2415         offdiagIdx[r] = cmap[*bj];
2416       }
2417       ba++;
2418       bj++;
2419     }
2420   }
2421 
2422   PetscCall(VecGetArrayWrite(v, &a));
2423   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2424   for (r = 0; r < m; ++r) {
2425     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2426       a[r] = diagA[r];
2427       if (idx) idx[r] = cstart + diagIdx[r];
2428     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2429       a[r] = diagA[r];
2430       if (idx) {
2431         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2432           idx[r] = cstart + diagIdx[r];
2433         } else idx[r] = offdiagIdx[r];
2434       }
2435     } else {
2436       a[r] = offdiagA[r];
2437       if (idx) idx[r] = offdiagIdx[r];
2438     }
2439   }
2440   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2441   PetscCall(VecRestoreArrayWrite(v, &a));
2442   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2443   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2444   PetscCall(VecDestroy(&diagV));
2445   PetscCall(VecDestroy(&offdiagV));
2446   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2447   PetscFunctionReturn(PETSC_SUCCESS);
2448 }
2449 
2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2451 {
2452   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2453   PetscInt           m = A->rmap->n, n = A->cmap->n;
2454   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2455   PetscInt          *cmap = mat->garray;
2456   PetscInt          *diagIdx, *offdiagIdx;
2457   Vec                diagV, offdiagV;
2458   PetscScalar       *a, *diagA, *offdiagA;
2459   const PetscScalar *ba, *bav;
2460   PetscInt           r, j, col, ncols, *bi, *bj;
2461   Mat                B = mat->B;
2462   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2463 
2464   PetscFunctionBegin;
2465   /* When a process holds entire A and other processes have no entry */
2466   if (A->cmap->N == n) {
2467     PetscCall(VecGetArrayWrite(v, &diagA));
2468     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2469     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2470     PetscCall(VecDestroy(&diagV));
2471     PetscCall(VecRestoreArrayWrite(v, &diagA));
2472     PetscFunctionReturn(PETSC_SUCCESS);
2473   } else if (n == 0) {
2474     if (m) {
2475       PetscCall(VecGetArrayWrite(v, &a));
2476       for (r = 0; r < m; r++) {
2477         a[r] = PETSC_MIN_REAL;
2478         if (idx) idx[r] = -1;
2479       }
2480       PetscCall(VecRestoreArrayWrite(v, &a));
2481     }
2482     PetscFunctionReturn(PETSC_SUCCESS);
2483   }
2484 
2485   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2486   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2487   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2488   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2489 
2490   /* Get offdiagIdx[] for implicit 0.0 */
2491   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2492   ba = bav;
2493   bi = b->i;
2494   bj = b->j;
2495   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2496   for (r = 0; r < m; r++) {
2497     ncols = bi[r + 1] - bi[r];
2498     if (ncols == A->cmap->N - n) { /* Brow is dense */
2499       offdiagA[r]   = *ba;
2500       offdiagIdx[r] = cmap[0];
2501     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2502       offdiagA[r] = 0.0;
2503 
2504       /* Find first hole in the cmap */
2505       for (j = 0; j < ncols; j++) {
2506         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2507         if (col > j && j < cstart) {
2508           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2509           break;
2510         } else if (col > j + n && j >= cstart) {
2511           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2512           break;
2513         }
2514       }
2515       if (j == ncols && ncols < A->cmap->N - n) {
2516         /* a hole is outside compressed Bcols */
2517         if (ncols == 0) {
2518           if (cstart) {
2519             offdiagIdx[r] = 0;
2520           } else offdiagIdx[r] = cend;
2521         } else { /* ncols > 0 */
2522           offdiagIdx[r] = cmap[ncols - 1] + 1;
2523           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2524         }
2525       }
2526     }
2527 
2528     for (j = 0; j < ncols; j++) {
2529       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2530         offdiagA[r]   = *ba;
2531         offdiagIdx[r] = cmap[*bj];
2532       }
2533       ba++;
2534       bj++;
2535     }
2536   }
2537 
2538   PetscCall(VecGetArrayWrite(v, &a));
2539   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2540   for (r = 0; r < m; ++r) {
2541     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2542       a[r] = diagA[r];
2543       if (idx) idx[r] = cstart + diagIdx[r];
2544     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2545       a[r] = diagA[r];
2546       if (idx) {
2547         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2548           idx[r] = cstart + diagIdx[r];
2549         } else idx[r] = offdiagIdx[r];
2550       }
2551     } else {
2552       a[r] = offdiagA[r];
2553       if (idx) idx[r] = offdiagIdx[r];
2554     }
2555   }
2556   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2557   PetscCall(VecRestoreArrayWrite(v, &a));
2558   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2559   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2560   PetscCall(VecDestroy(&diagV));
2561   PetscCall(VecDestroy(&offdiagV));
2562   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2563   PetscFunctionReturn(PETSC_SUCCESS);
2564 }
2565 
2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2567 {
2568   Mat *dummy;
2569 
2570   PetscFunctionBegin;
2571   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2572   *newmat = *dummy;
2573   PetscCall(PetscFree(dummy));
2574   PetscFunctionReturn(PETSC_SUCCESS);
2575 }
2576 
2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2578 {
2579   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2580 
2581   PetscFunctionBegin;
2582   PetscCall(MatInvertBlockDiagonal(a->A, values));
2583   A->factorerrortype = a->A->factorerrortype;
2584   PetscFunctionReturn(PETSC_SUCCESS);
2585 }
2586 
2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2588 {
2589   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2590 
2591   PetscFunctionBegin;
2592   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2593   PetscCall(MatSetRandom(aij->A, rctx));
2594   if (x->assembled) {
2595     PetscCall(MatSetRandom(aij->B, rctx));
2596   } else {
2597     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2598   }
2599   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2600   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2601   PetscFunctionReturn(PETSC_SUCCESS);
2602 }
2603 
2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2605 {
2606   PetscFunctionBegin;
2607   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2608   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2609   PetscFunctionReturn(PETSC_SUCCESS);
2610 }
2611 
2612 /*@
2613   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2614 
2615   Not Collective
2616 
2617   Input Parameter:
2618 . A - the matrix
2619 
2620   Output Parameter:
2621 . nz - the number of nonzeros
2622 
2623   Level: advanced
2624 
2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2626 @*/
2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2628 {
2629   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2630   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2631   PetscBool   isaij;
2632 
2633   PetscFunctionBegin;
2634   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2635   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2636   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2637   PetscFunctionReturn(PETSC_SUCCESS);
2638 }
2639 
2640 /*@
2641   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2642 
2643   Collective
2644 
2645   Input Parameters:
2646 + A  - the matrix
2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2648 
2649   Level: advanced
2650 
2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2652 @*/
2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2654 {
2655   PetscFunctionBegin;
2656   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2657   PetscFunctionReturn(PETSC_SUCCESS);
2658 }
2659 
2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject)
2661 {
2662   PetscBool sc = PETSC_FALSE, flg;
2663 
2664   PetscFunctionBegin;
2665   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2666   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2667   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2668   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2669   PetscOptionsHeadEnd();
2670   PetscFunctionReturn(PETSC_SUCCESS);
2671 }
2672 
2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2674 {
2675   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2676   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2677 
2678   PetscFunctionBegin;
2679   if (!Y->preallocated) {
2680     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2681   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2682     PetscInt nonew = aij->nonew;
2683     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2684     aij->nonew = nonew;
2685   }
2686   PetscCall(MatShift_Basic(Y, a));
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2691 {
2692   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2693 
2694   PetscFunctionBegin;
2695   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2696   PetscCall(MatMissingDiagonal(a->A, missing, d));
2697   if (d) {
2698     PetscInt rstart;
2699     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2700     *d += rstart;
2701   }
2702   PetscFunctionReturn(PETSC_SUCCESS);
2703 }
2704 
2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2706 {
2707   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2708 
2709   PetscFunctionBegin;
2710   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2711   PetscFunctionReturn(PETSC_SUCCESS);
2712 }
2713 
2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2715 {
2716   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2717 
2718   PetscFunctionBegin;
2719   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2720   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2721   PetscFunctionReturn(PETSC_SUCCESS);
2722 }
2723 
2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2725                                        MatGetRow_MPIAIJ,
2726                                        MatRestoreRow_MPIAIJ,
2727                                        MatMult_MPIAIJ,
2728                                        /* 4*/ MatMultAdd_MPIAIJ,
2729                                        MatMultTranspose_MPIAIJ,
2730                                        MatMultTransposeAdd_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        /*10*/ NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        MatSOR_MPIAIJ,
2738                                        MatTranspose_MPIAIJ,
2739                                        /*15*/ MatGetInfo_MPIAIJ,
2740                                        MatEqual_MPIAIJ,
2741                                        MatGetDiagonal_MPIAIJ,
2742                                        MatDiagonalScale_MPIAIJ,
2743                                        MatNorm_MPIAIJ,
2744                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2745                                        MatAssemblyEnd_MPIAIJ,
2746                                        MatSetOption_MPIAIJ,
2747                                        MatZeroEntries_MPIAIJ,
2748                                        /*24*/ MatZeroRows_MPIAIJ,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                        /*29*/ MatSetUp_MPI_Hash,
2754                                        NULL,
2755                                        NULL,
2756                                        MatGetDiagonalBlock_MPIAIJ,
2757                                        NULL,
2758                                        /*34*/ MatDuplicate_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        /*39*/ MatAXPY_MPIAIJ,
2764                                        MatCreateSubMatrices_MPIAIJ,
2765                                        MatIncreaseOverlap_MPIAIJ,
2766                                        MatGetValues_MPIAIJ,
2767                                        MatCopy_MPIAIJ,
2768                                        /*44*/ MatGetRowMax_MPIAIJ,
2769                                        MatScale_MPIAIJ,
2770                                        MatShift_MPIAIJ,
2771                                        MatDiagonalSet_MPIAIJ,
2772                                        MatZeroRowsColumns_MPIAIJ,
2773                                        /*49*/ MatSetRandom_MPIAIJ,
2774                                        MatGetRowIJ_MPIAIJ,
2775                                        MatRestoreRowIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2779                                        NULL,
2780                                        MatSetUnfactored_MPIAIJ,
2781                                        MatPermute_MPIAIJ,
2782                                        NULL,
2783                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2784                                        MatDestroy_MPIAIJ,
2785                                        MatView_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        /*64*/ NULL,
2789                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2790                                        NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2794                                        MatGetRowMinAbs_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        /*75*/ MatFDColoringApply_AIJ,
2800                                        MatSetFromOptions_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        MatFindZeroDiagonals_MPIAIJ,
2804                                        /*80*/ NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        /*83*/ MatLoad_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        /*89*/ NULL,
2814                                        NULL,
2815                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2816                                        NULL,
2817                                        NULL,
2818                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                        MatBindToCPU_MPIAIJ,
2823                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2824                                        NULL,
2825                                        NULL,
2826                                        MatConjugate_MPIAIJ,
2827                                        NULL,
2828                                        /*104*/ MatSetValuesRow_MPIAIJ,
2829                                        MatRealPart_MPIAIJ,
2830                                        MatImaginaryPart_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        /*109*/ NULL,
2834                                        NULL,
2835                                        MatGetRowMin_MPIAIJ,
2836                                        NULL,
2837                                        MatMissingDiagonal_MPIAIJ,
2838                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2839                                        NULL,
2840                                        MatGetGhosts_MPIAIJ,
2841                                        NULL,
2842                                        NULL,
2843                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2844                                        NULL,
2845                                        NULL,
2846                                        NULL,
2847                                        MatGetMultiProcBlock_MPIAIJ,
2848                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2849                                        MatGetColumnReductions_MPIAIJ,
2850                                        MatInvertBlockDiagonal_MPIAIJ,
2851                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2852                                        MatCreateSubMatricesMPI_MPIAIJ,
2853                                        /*129*/ NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2857                                        NULL,
2858                                        /*134*/ NULL,
2859                                        NULL,
2860                                        NULL,
2861                                        NULL,
2862                                        NULL,
2863                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2864                                        NULL,
2865                                        NULL,
2866                                        MatFDColoringSetUp_MPIXAIJ,
2867                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2868                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2869                                        /*145*/ NULL,
2870                                        NULL,
2871                                        NULL,
2872                                        MatCreateGraph_Simple_AIJ,
2873                                        NULL,
2874                                        /*150*/ NULL,
2875                                        MatEliminateZeros_MPIAIJ,
2876                                        MatGetRowSumAbs_MPIAIJ,
2877                                        NULL,
2878                                        NULL,
2879                                        /*155*/ NULL,
2880                                        MatCopyHashToXAIJ_MPI_Hash};
2881 
2882 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2883 {
2884   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2885 
2886   PetscFunctionBegin;
2887   PetscCall(MatStoreValues(aij->A));
2888   PetscCall(MatStoreValues(aij->B));
2889   PetscFunctionReturn(PETSC_SUCCESS);
2890 }
2891 
2892 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2893 {
2894   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2895 
2896   PetscFunctionBegin;
2897   PetscCall(MatRetrieveValues(aij->A));
2898   PetscCall(MatRetrieveValues(aij->B));
2899   PetscFunctionReturn(PETSC_SUCCESS);
2900 }
2901 
2902 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2903 {
2904   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2905   PetscMPIInt size;
2906 
2907   PetscFunctionBegin;
2908   if (B->hash_active) {
2909     B->ops[0]      = b->cops;
2910     B->hash_active = PETSC_FALSE;
2911   }
2912   PetscCall(PetscLayoutSetUp(B->rmap));
2913   PetscCall(PetscLayoutSetUp(B->cmap));
2914 
2915 #if defined(PETSC_USE_CTABLE)
2916   PetscCall(PetscHMapIDestroy(&b->colmap));
2917 #else
2918   PetscCall(PetscFree(b->colmap));
2919 #endif
2920   PetscCall(PetscFree(b->garray));
2921   PetscCall(VecDestroy(&b->lvec));
2922   PetscCall(VecScatterDestroy(&b->Mvctx));
2923 
2924   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2925 
2926   MatSeqXAIJGetOptions_Private(b->B);
2927   PetscCall(MatDestroy(&b->B));
2928   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2929   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2930   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2931   PetscCall(MatSetType(b->B, MATSEQAIJ));
2932   MatSeqXAIJRestoreOptions_Private(b->B);
2933 
2934   MatSeqXAIJGetOptions_Private(b->A);
2935   PetscCall(MatDestroy(&b->A));
2936   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2937   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2938   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2939   PetscCall(MatSetType(b->A, MATSEQAIJ));
2940   MatSeqXAIJRestoreOptions_Private(b->A);
2941 
2942   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2943   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2944   B->preallocated  = PETSC_TRUE;
2945   B->was_assembled = PETSC_FALSE;
2946   B->assembled     = PETSC_FALSE;
2947   PetscFunctionReturn(PETSC_SUCCESS);
2948 }
2949 
2950 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2951 {
2952   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2953   PetscBool   ondiagreset, offdiagreset, memoryreset;
2954 
2955   PetscFunctionBegin;
2956   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2957   PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()");
2958   if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS);
2959 
2960   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset));
2961   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset));
2962   memoryreset = (PetscBool)(ondiagreset || offdiagreset);
2963   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B)));
2964   if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS);
2965 
2966   PetscCall(PetscLayoutSetUp(B->rmap));
2967   PetscCall(PetscLayoutSetUp(B->cmap));
2968   PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled");
2969   PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2970   PetscCall(VecScatterDestroy(&b->Mvctx));
2971 
2972   B->preallocated  = PETSC_TRUE;
2973   B->was_assembled = PETSC_FALSE;
2974   B->assembled     = PETSC_FALSE;
2975   /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */
2976   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2977   PetscFunctionReturn(PETSC_SUCCESS);
2978 }
2979 
2980 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2981 {
2982   Mat         mat;
2983   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2984 
2985   PetscFunctionBegin;
2986   *newmat = NULL;
2987   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2988   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2989   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2990   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2991   a = (Mat_MPIAIJ *)mat->data;
2992 
2993   mat->factortype = matin->factortype;
2994   mat->assembled  = matin->assembled;
2995   mat->insertmode = NOT_SET_VALUES;
2996 
2997   a->size         = oldmat->size;
2998   a->rank         = oldmat->rank;
2999   a->donotstash   = oldmat->donotstash;
3000   a->roworiented  = oldmat->roworiented;
3001   a->rowindices   = NULL;
3002   a->rowvalues    = NULL;
3003   a->getrowactive = PETSC_FALSE;
3004 
3005   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3006   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3007   if (matin->hash_active) {
3008     PetscCall(MatSetUp(mat));
3009   } else {
3010     mat->preallocated = matin->preallocated;
3011     if (oldmat->colmap) {
3012 #if defined(PETSC_USE_CTABLE)
3013       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3014 #else
3015       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3016       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3017 #endif
3018     } else a->colmap = NULL;
3019     if (oldmat->garray) {
3020       PetscInt len;
3021       len = oldmat->B->cmap->n;
3022       PetscCall(PetscMalloc1(len + 1, &a->garray));
3023       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3024     } else a->garray = NULL;
3025 
3026     /* It may happen MatDuplicate is called with a non-assembled matrix
3027       In fact, MatDuplicate only requires the matrix to be preallocated
3028       This may happen inside a DMCreateMatrix_Shell */
3029     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3030     if (oldmat->Mvctx) {
3031       a->Mvctx = oldmat->Mvctx;
3032       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3033     }
3034     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3035     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3036   }
3037   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3038   *newmat = mat;
3039   PetscFunctionReturn(PETSC_SUCCESS);
3040 }
3041 
3042 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3043 {
3044   PetscBool isbinary, ishdf5;
3045 
3046   PetscFunctionBegin;
3047   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3048   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3049   /* force binary viewer to load .info file if it has not yet done so */
3050   PetscCall(PetscViewerSetUp(viewer));
3051   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3052   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3053   if (isbinary) {
3054     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3055   } else if (ishdf5) {
3056 #if defined(PETSC_HAVE_HDF5)
3057     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3058 #else
3059     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3060 #endif
3061   } else {
3062     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3063   }
3064   PetscFunctionReturn(PETSC_SUCCESS);
3065 }
3066 
3067 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3068 {
3069   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3070   PetscInt    *rowidxs, *colidxs;
3071   PetscScalar *matvals;
3072 
3073   PetscFunctionBegin;
3074   PetscCall(PetscViewerSetUp(viewer));
3075 
3076   /* read in matrix header */
3077   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3078   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3079   M  = header[1];
3080   N  = header[2];
3081   nz = header[3];
3082   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3083   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3084   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3085 
3086   /* set block sizes from the viewer's .info file */
3087   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3088   /* set global sizes if not set already */
3089   if (mat->rmap->N < 0) mat->rmap->N = M;
3090   if (mat->cmap->N < 0) mat->cmap->N = N;
3091   PetscCall(PetscLayoutSetUp(mat->rmap));
3092   PetscCall(PetscLayoutSetUp(mat->cmap));
3093 
3094   /* check if the matrix sizes are correct */
3095   PetscCall(MatGetSize(mat, &rows, &cols));
3096   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3097 
3098   /* read in row lengths and build row indices */
3099   PetscCall(MatGetLocalSize(mat, &m, NULL));
3100   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3101   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3102   rowidxs[0] = 0;
3103   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3104   if (nz != PETSC_INT_MAX) {
3105     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3106     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3107   }
3108 
3109   /* read in column indices and matrix values */
3110   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3111   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3112   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3113   /* store matrix indices and values */
3114   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3115   PetscCall(PetscFree(rowidxs));
3116   PetscCall(PetscFree2(colidxs, matvals));
3117   PetscFunctionReturn(PETSC_SUCCESS);
3118 }
3119 
3120 /* Not scalable because of ISAllGather() unless getting all columns. */
3121 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3122 {
3123   IS          iscol_local;
3124   PetscBool   isstride;
3125   PetscMPIInt gisstride = 0;
3126 
3127   PetscFunctionBegin;
3128   /* check if we are grabbing all columns*/
3129   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3130 
3131   if (isstride) {
3132     PetscInt start, len, mstart, mlen;
3133     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3134     PetscCall(ISGetLocalSize(iscol, &len));
3135     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3136     if (mstart == start && mlen - mstart == len) gisstride = 1;
3137   }
3138 
3139   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3140   if (gisstride) {
3141     PetscInt N;
3142     PetscCall(MatGetSize(mat, NULL, &N));
3143     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3144     PetscCall(ISSetIdentity(iscol_local));
3145     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3146   } else {
3147     PetscInt cbs;
3148     PetscCall(ISGetBlockSize(iscol, &cbs));
3149     PetscCall(ISAllGather(iscol, &iscol_local));
3150     PetscCall(ISSetBlockSize(iscol_local, cbs));
3151   }
3152 
3153   *isseq = iscol_local;
3154   PetscFunctionReturn(PETSC_SUCCESS);
3155 }
3156 
3157 /*
3158  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3159  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3160 
3161  Input Parameters:
3162 +   mat - matrix
3163 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3164            i.e., mat->rstart <= isrow[i] < mat->rend
3165 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3166            i.e., mat->cstart <= iscol[i] < mat->cend
3167 
3168  Output Parameters:
3169 +   isrow_d - sequential row index set for retrieving mat->A
3170 .   iscol_d - sequential  column index set for retrieving mat->A
3171 .   iscol_o - sequential column index set for retrieving mat->B
3172 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3173  */
3174 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[])
3175 {
3176   Vec             x, cmap;
3177   const PetscInt *is_idx;
3178   PetscScalar    *xarray, *cmaparray;
3179   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3180   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3181   Mat             B    = a->B;
3182   Vec             lvec = a->lvec, lcmap;
3183   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3184   MPI_Comm        comm;
3185   VecScatter      Mvctx = a->Mvctx;
3186 
3187   PetscFunctionBegin;
3188   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3189   PetscCall(ISGetLocalSize(iscol, &ncols));
3190 
3191   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3192   PetscCall(MatCreateVecs(mat, &x, NULL));
3193   PetscCall(VecSet(x, -1.0));
3194   PetscCall(VecDuplicate(x, &cmap));
3195   PetscCall(VecSet(cmap, -1.0));
3196 
3197   /* Get start indices */
3198   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3199   isstart -= ncols;
3200   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3201 
3202   PetscCall(ISGetIndices(iscol, &is_idx));
3203   PetscCall(VecGetArray(x, &xarray));
3204   PetscCall(VecGetArray(cmap, &cmaparray));
3205   PetscCall(PetscMalloc1(ncols, &idx));
3206   for (i = 0; i < ncols; i++) {
3207     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3208     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3209     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3210   }
3211   PetscCall(VecRestoreArray(x, &xarray));
3212   PetscCall(VecRestoreArray(cmap, &cmaparray));
3213   PetscCall(ISRestoreIndices(iscol, &is_idx));
3214 
3215   /* Get iscol_d */
3216   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3217   PetscCall(ISGetBlockSize(iscol, &i));
3218   PetscCall(ISSetBlockSize(*iscol_d, i));
3219 
3220   /* Get isrow_d */
3221   PetscCall(ISGetLocalSize(isrow, &m));
3222   rstart = mat->rmap->rstart;
3223   PetscCall(PetscMalloc1(m, &idx));
3224   PetscCall(ISGetIndices(isrow, &is_idx));
3225   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3226   PetscCall(ISRestoreIndices(isrow, &is_idx));
3227 
3228   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3229   PetscCall(ISGetBlockSize(isrow, &i));
3230   PetscCall(ISSetBlockSize(*isrow_d, i));
3231 
3232   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3233   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3234   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3235 
3236   PetscCall(VecDuplicate(lvec, &lcmap));
3237 
3238   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3239   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3240 
3241   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3242   /* off-process column indices */
3243   count = 0;
3244   PetscCall(PetscMalloc1(Bn, &idx));
3245   PetscCall(PetscMalloc1(Bn, &cmap1));
3246 
3247   PetscCall(VecGetArray(lvec, &xarray));
3248   PetscCall(VecGetArray(lcmap, &cmaparray));
3249   for (i = 0; i < Bn; i++) {
3250     if (PetscRealPart(xarray[i]) > -1.0) {
3251       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3252       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3253       count++;
3254     }
3255   }
3256   PetscCall(VecRestoreArray(lvec, &xarray));
3257   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3258 
3259   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3260   /* cannot ensure iscol_o has same blocksize as iscol! */
3261 
3262   PetscCall(PetscFree(idx));
3263   *garray = cmap1;
3264 
3265   PetscCall(VecDestroy(&x));
3266   PetscCall(VecDestroy(&cmap));
3267   PetscCall(VecDestroy(&lcmap));
3268   PetscFunctionReturn(PETSC_SUCCESS);
3269 }
3270 
3271 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3272 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3273 {
3274   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3275   Mat         M = NULL;
3276   MPI_Comm    comm;
3277   IS          iscol_d, isrow_d, iscol_o;
3278   Mat         Asub = NULL, Bsub = NULL;
3279   PetscInt    n, count, M_size, N_size;
3280 
3281   PetscFunctionBegin;
3282   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3283 
3284   if (call == MAT_REUSE_MATRIX) {
3285     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3286     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3287     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3288 
3289     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3290     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3291 
3292     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3293     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3294 
3295     /* Update diagonal and off-diagonal portions of submat */
3296     asub = (Mat_MPIAIJ *)(*submat)->data;
3297     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3298     PetscCall(ISGetLocalSize(iscol_o, &n));
3299     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3300     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3301     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3302 
3303   } else { /* call == MAT_INITIAL_MATRIX) */
3304     PetscInt *garray, *garray_compact;
3305     PetscInt  BsubN;
3306 
3307     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3308     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3309 
3310     /* Create local submatrices Asub and Bsub */
3311     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3312     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3313 
3314     // Compact garray so its not of size Bn
3315     PetscCall(ISGetSize(iscol_o, &count));
3316     PetscCall(PetscMalloc1(count, &garray_compact));
3317     PetscCall(PetscArraycpy(garray_compact, garray, count));
3318 
3319     /* Create submatrix M */
3320     PetscCall(ISGetSize(isrow, &M_size));
3321     PetscCall(ISGetSize(iscol, &N_size));
3322     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M));
3323 
3324     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3325     asub = (Mat_MPIAIJ *)M->data;
3326 
3327     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3328     n = asub->B->cmap->N;
3329     if (BsubN > n) {
3330       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3331       const PetscInt *idx;
3332       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3333       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3334 
3335       PetscCall(PetscMalloc1(n, &idx_new));
3336       j = 0;
3337       PetscCall(ISGetIndices(iscol_o, &idx));
3338       for (i = 0; i < n; i++) {
3339         if (j >= BsubN) break;
3340         while (subgarray[i] > garray[j]) j++;
3341 
3342         if (subgarray[i] == garray[j]) {
3343           idx_new[i] = idx[j++];
3344         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3345       }
3346       PetscCall(ISRestoreIndices(iscol_o, &idx));
3347 
3348       PetscCall(ISDestroy(&iscol_o));
3349       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3350 
3351     } else if (BsubN < n) {
3352       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3353     }
3354 
3355     PetscCall(PetscFree(garray));
3356     *submat = M;
3357 
3358     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3359     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3360     PetscCall(ISDestroy(&isrow_d));
3361 
3362     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3363     PetscCall(ISDestroy(&iscol_d));
3364 
3365     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3366     PetscCall(ISDestroy(&iscol_o));
3367   }
3368   PetscFunctionReturn(PETSC_SUCCESS);
3369 }
3370 
3371 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3372 {
3373   IS        iscol_local = NULL, isrow_d;
3374   PetscInt  csize;
3375   PetscInt  n, i, j, start, end;
3376   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3377   MPI_Comm  comm;
3378 
3379   PetscFunctionBegin;
3380   /* If isrow has same processor distribution as mat,
3381      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3382   if (call == MAT_REUSE_MATRIX) {
3383     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3384     if (isrow_d) {
3385       sameRowDist  = PETSC_TRUE;
3386       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3387     } else {
3388       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3389       if (iscol_local) {
3390         sameRowDist  = PETSC_TRUE;
3391         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3392       }
3393     }
3394   } else {
3395     /* Check if isrow has same processor distribution as mat */
3396     sameDist[0] = PETSC_FALSE;
3397     PetscCall(ISGetLocalSize(isrow, &n));
3398     if (!n) {
3399       sameDist[0] = PETSC_TRUE;
3400     } else {
3401       PetscCall(ISGetMinMax(isrow, &i, &j));
3402       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3403       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3404     }
3405 
3406     /* Check if iscol has same processor distribution as mat */
3407     sameDist[1] = PETSC_FALSE;
3408     PetscCall(ISGetLocalSize(iscol, &n));
3409     if (!n) {
3410       sameDist[1] = PETSC_TRUE;
3411     } else {
3412       PetscCall(ISGetMinMax(iscol, &i, &j));
3413       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3414       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3415     }
3416 
3417     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3418     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3419     sameRowDist = tsameDist[0];
3420   }
3421 
3422   if (sameRowDist) {
3423     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3424       /* isrow and iscol have same processor distribution as mat */
3425       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3426       PetscFunctionReturn(PETSC_SUCCESS);
3427     } else { /* sameRowDist */
3428       /* isrow has same processor distribution as mat */
3429       if (call == MAT_INITIAL_MATRIX) {
3430         PetscBool sorted;
3431         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3432         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3433         PetscCall(ISGetSize(iscol, &i));
3434         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3435 
3436         PetscCall(ISSorted(iscol_local, &sorted));
3437         if (sorted) {
3438           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3439           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3440           PetscFunctionReturn(PETSC_SUCCESS);
3441         }
3442       } else { /* call == MAT_REUSE_MATRIX */
3443         IS iscol_sub;
3444         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3445         if (iscol_sub) {
3446           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3447           PetscFunctionReturn(PETSC_SUCCESS);
3448         }
3449       }
3450     }
3451   }
3452 
3453   /* General case: iscol -> iscol_local which has global size of iscol */
3454   if (call == MAT_REUSE_MATRIX) {
3455     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3456     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3457   } else {
3458     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3459   }
3460 
3461   PetscCall(ISGetLocalSize(iscol, &csize));
3462   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3463 
3464   if (call == MAT_INITIAL_MATRIX) {
3465     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3466     PetscCall(ISDestroy(&iscol_local));
3467   }
3468   PetscFunctionReturn(PETSC_SUCCESS);
3469 }
3470 
3471 /*@C
3472   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3473   and "off-diagonal" part of the matrix in CSR format.
3474 
3475   Collective
3476 
3477   Input Parameters:
3478 + comm   - MPI communicator
3479 . M      - the global row size
3480 . N      - the global column size
3481 . A      - "diagonal" portion of matrix
3482 . B      - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray
3483 - garray - either `NULL` or the global index of `B` columns. If not `NULL`, it should be allocated by `PetscMalloc1()` and will be owned by `mat` thereafter.
3484 
3485   Output Parameter:
3486 . mat - the matrix, with input `A` as its local diagonal matrix
3487 
3488   Level: advanced
3489 
3490   Notes:
3491   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3492 
3493   `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore.
3494 
3495   If `garray` is `NULL`, `B` will be compacted to use local indices. In this sense, `B`'s sparsity pattern (nonzerostate) will be changed. If `B` is a device matrix, we need to somehow also update
3496   `B`'s copy on device.  We do so by increasing `B`'s nonzerostate. In use of `B` on device, device matrix types should detect this change (ref. internal routines `MatSeqAIJCUSPARSECopyToGPU()` or
3497   `MatAssemblyEnd_SeqAIJKokkos()`) and will just destroy and then recreate the device copy of `B`. It is not optimal, but is easy to implement and less hacky. To avoid this overhead, try to compute `garray`
3498   yourself, see algorithms in the private function `MatSetUpMultiply_MPIAIJ()`.
3499 
3500   The `NULL`-ness of `garray` doesn't need to be collective, in other words, `garray` can be `NULL` on some processes while not on others.
3501 
3502 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3503 @*/
3504 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat)
3505 {
3506   PetscInt m, n;
3507   MatType  mpi_mat_type;
3508 
3509   PetscFunctionBegin;
3510   PetscCall(MatCreate(comm, mat));
3511   PetscCall(MatGetSize(A, &m, &n));
3512   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3513   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3514 
3515   PetscCall(MatSetSizes(*mat, m, n, M, N));
3516   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3517   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3518   PetscCall(MatSetType(*mat, mpi_mat_type));
3519 
3520   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3521 
3522   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3523   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3524   if (!garray) {
3525     const PetscScalar *ba;
3526 
3527     B->nonzerostate++;
3528     PetscCall(MatSeqAIJGetArrayRead(B, &ba)); /* Since we will destroy B's device copy, we need to make sure the host copy is up to date */
3529     PetscCall(MatSeqAIJRestoreArrayRead(B, &ba));
3530   }
3531   PetscCall(MatSetMPIAIJWithSplitSeqAIJ(*mat, A, B, garray));
3532   PetscFunctionReturn(PETSC_SUCCESS);
3533 }
3534 
3535 /*
3536   MatSetMPIAIJWithSplitSeqAIJ - Set the diag and offdiag matrices of a `MATMPIAIJ` matrix.
3537    It is similar to `MatCreateMPIAIJWithSplitArrays()`. This routine allows passing in
3538    B with local indices and the correct size, along with the accompanying
3539    garray, hence skipping compactification
3540 
3541   Collective
3542 
3543   Input Parameters:
3544 +  mat    - the MATMPIAIJ matrix, which should have its type and layout set, but should not have its diag, offdiag matrices set
3545 .  A      - the diag matrix using local col ids
3546 .  B      - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray
3547 -  garray - either `NULL` or the global index of `B` columns
3548 
3549   Output Parameter:
3550 .  mat   - the updated `MATMPIAIJ` matrix
3551 
3552   Level: advanced
3553 
3554   Notes:
3555   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3556 
3557   `A` and `B` become part of output mat. The user cannot use `A` and `B` anymore.
3558 
3559 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3560 */
3561 PETSC_INTERN PetscErrorCode MatSetMPIAIJWithSplitSeqAIJ(Mat mat, Mat A, Mat B, PetscInt *garray)
3562 {
3563   PetscFunctionBegin;
3564   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
3565   PetscInt    m, n, M, N, Am, An, Bm, Bn;
3566 
3567   PetscCall(MatGetSize(mat, &M, &N));
3568   PetscCall(MatGetLocalSize(mat, &m, &n));
3569   PetscCall(MatGetLocalSize(A, &Am, &An));
3570   PetscCall(MatGetLocalSize(B, &Bm, &Bn));
3571 
3572   PetscCheck(m == Am && m == Bm, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of rows do not match");
3573   PetscCheck(n == An, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of columns do not match");
3574   PetscCheck(!mpiaij->A && !mpiaij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "A, B of the MPIAIJ matrix are not empty");
3575   mpiaij->A      = A;
3576   mpiaij->B      = B;
3577   mpiaij->garray = garray;
3578 
3579   mat->preallocated     = PETSC_TRUE;
3580   mat->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */
3581 
3582   PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3583   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3584   /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and
3585    also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced
3586    */
3587   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3588   PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3589   PetscCall(MatSetOption(mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3590   PetscFunctionReturn(PETSC_SUCCESS);
3591 }
3592 
3593 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3594 
3595 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3596 {
3597   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3598   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3599   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3600   Mat             M, Msub, B = a->B;
3601   MatScalar      *aa;
3602   Mat_SeqAIJ     *aij;
3603   PetscInt       *garray = a->garray, *colsub, Ncols;
3604   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3605   IS              iscol_sub, iscmap;
3606   const PetscInt *is_idx, *cmap;
3607   PetscBool       allcolumns = PETSC_FALSE;
3608   MPI_Comm        comm;
3609 
3610   PetscFunctionBegin;
3611   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3612   if (call == MAT_REUSE_MATRIX) {
3613     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3614     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3615     PetscCall(ISGetLocalSize(iscol_sub, &count));
3616 
3617     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3618     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3619 
3620     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3621     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3622 
3623     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3624 
3625   } else { /* call == MAT_INITIAL_MATRIX) */
3626     PetscBool flg;
3627 
3628     PetscCall(ISGetLocalSize(iscol, &n));
3629     PetscCall(ISGetSize(iscol, &Ncols));
3630 
3631     /* (1) iscol -> nonscalable iscol_local */
3632     /* Check for special case: each processor gets entire matrix columns */
3633     PetscCall(ISIdentity(iscol_local, &flg));
3634     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3635     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3636     if (allcolumns) {
3637       iscol_sub = iscol_local;
3638       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3639       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3640 
3641     } else {
3642       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3643       PetscInt *idx, *cmap1, k;
3644       PetscCall(PetscMalloc1(Ncols, &idx));
3645       PetscCall(PetscMalloc1(Ncols, &cmap1));
3646       PetscCall(ISGetIndices(iscol_local, &is_idx));
3647       count = 0;
3648       k     = 0;
3649       for (i = 0; i < Ncols; i++) {
3650         j = is_idx[i];
3651         if (j >= cstart && j < cend) {
3652           /* diagonal part of mat */
3653           idx[count]     = j;
3654           cmap1[count++] = i; /* column index in submat */
3655         } else if (Bn) {
3656           /* off-diagonal part of mat */
3657           if (j == garray[k]) {
3658             idx[count]     = j;
3659             cmap1[count++] = i; /* column index in submat */
3660           } else if (j > garray[k]) {
3661             while (j > garray[k] && k < Bn - 1) k++;
3662             if (j == garray[k]) {
3663               idx[count]     = j;
3664               cmap1[count++] = i; /* column index in submat */
3665             }
3666           }
3667         }
3668       }
3669       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3670 
3671       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3672       PetscCall(ISGetBlockSize(iscol, &cbs));
3673       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3674 
3675       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3676     }
3677 
3678     /* (3) Create sequential Msub */
3679     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3680   }
3681 
3682   PetscCall(ISGetLocalSize(iscol_sub, &count));
3683   aij = (Mat_SeqAIJ *)Msub->data;
3684   ii  = aij->i;
3685   PetscCall(ISGetIndices(iscmap, &cmap));
3686 
3687   /*
3688       m - number of local rows
3689       Ncols - number of columns (same on all processors)
3690       rstart - first row in new global matrix generated
3691   */
3692   PetscCall(MatGetSize(Msub, &m, NULL));
3693 
3694   if (call == MAT_INITIAL_MATRIX) {
3695     /* (4) Create parallel newmat */
3696     PetscMPIInt rank, size;
3697     PetscInt    csize;
3698 
3699     PetscCallMPI(MPI_Comm_size(comm, &size));
3700     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3701 
3702     /*
3703         Determine the number of non-zeros in the diagonal and off-diagonal
3704         portions of the matrix in order to do correct preallocation
3705     */
3706 
3707     /* first get start and end of "diagonal" columns */
3708     PetscCall(ISGetLocalSize(iscol, &csize));
3709     if (csize == PETSC_DECIDE) {
3710       PetscCall(ISGetSize(isrow, &mglobal));
3711       if (mglobal == Ncols) { /* square matrix */
3712         nlocal = m;
3713       } else {
3714         nlocal = Ncols / size + ((Ncols % size) > rank);
3715       }
3716     } else {
3717       nlocal = csize;
3718     }
3719     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3720     rstart = rend - nlocal;
3721     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3722 
3723     /* next, compute all the lengths */
3724     jj = aij->j;
3725     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3726     olens = dlens + m;
3727     for (i = 0; i < m; i++) {
3728       jend = ii[i + 1] - ii[i];
3729       olen = 0;
3730       dlen = 0;
3731       for (j = 0; j < jend; j++) {
3732         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3733         else dlen++;
3734         jj++;
3735       }
3736       olens[i] = olen;
3737       dlens[i] = dlen;
3738     }
3739 
3740     PetscCall(ISGetBlockSize(isrow, &bs));
3741     PetscCall(ISGetBlockSize(iscol, &cbs));
3742 
3743     PetscCall(MatCreate(comm, &M));
3744     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3745     PetscCall(MatSetBlockSizes(M, bs, cbs));
3746     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3747     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3748     PetscCall(PetscFree(dlens));
3749 
3750   } else { /* call == MAT_REUSE_MATRIX */
3751     M = *newmat;
3752     PetscCall(MatGetLocalSize(M, &i, NULL));
3753     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3754     PetscCall(MatZeroEntries(M));
3755     /*
3756          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3757        rather than the slower MatSetValues().
3758     */
3759     M->was_assembled = PETSC_TRUE;
3760     M->assembled     = PETSC_FALSE;
3761   }
3762 
3763   /* (5) Set values of Msub to *newmat */
3764   PetscCall(PetscMalloc1(count, &colsub));
3765   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3766 
3767   jj = aij->j;
3768   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3769   for (i = 0; i < m; i++) {
3770     row = rstart + i;
3771     nz  = ii[i + 1] - ii[i];
3772     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3773     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3774     jj += nz;
3775     aa += nz;
3776   }
3777   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3778   PetscCall(ISRestoreIndices(iscmap, &cmap));
3779 
3780   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3781   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3782 
3783   PetscCall(PetscFree(colsub));
3784 
3785   /* save Msub, iscol_sub and iscmap used in processor for next request */
3786   if (call == MAT_INITIAL_MATRIX) {
3787     *newmat = M;
3788     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3789     PetscCall(MatDestroy(&Msub));
3790 
3791     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3792     PetscCall(ISDestroy(&iscol_sub));
3793 
3794     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3795     PetscCall(ISDestroy(&iscmap));
3796 
3797     if (iscol_local) {
3798       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3799       PetscCall(ISDestroy(&iscol_local));
3800     }
3801   }
3802   PetscFunctionReturn(PETSC_SUCCESS);
3803 }
3804 
3805 /*
3806     Not great since it makes two copies of the submatrix, first an SeqAIJ
3807   in local and then by concatenating the local matrices the end result.
3808   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3809 
3810   This requires a sequential iscol with all indices.
3811 */
3812 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3813 {
3814   PetscMPIInt rank, size;
3815   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3816   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3817   Mat         M, Mreuse;
3818   MatScalar  *aa, *vwork;
3819   MPI_Comm    comm;
3820   Mat_SeqAIJ *aij;
3821   PetscBool   colflag, allcolumns = PETSC_FALSE;
3822 
3823   PetscFunctionBegin;
3824   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3825   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3826   PetscCallMPI(MPI_Comm_size(comm, &size));
3827 
3828   /* Check for special case: each processor gets entire matrix columns */
3829   PetscCall(ISIdentity(iscol, &colflag));
3830   PetscCall(ISGetLocalSize(iscol, &n));
3831   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3832   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3833 
3834   if (call == MAT_REUSE_MATRIX) {
3835     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3836     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3837     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3838   } else {
3839     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3840   }
3841 
3842   /*
3843       m - number of local rows
3844       n - number of columns (same on all processors)
3845       rstart - first row in new global matrix generated
3846   */
3847   PetscCall(MatGetSize(Mreuse, &m, &n));
3848   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3849   if (call == MAT_INITIAL_MATRIX) {
3850     aij = (Mat_SeqAIJ *)Mreuse->data;
3851     ii  = aij->i;
3852     jj  = aij->j;
3853 
3854     /*
3855         Determine the number of non-zeros in the diagonal and off-diagonal
3856         portions of the matrix in order to do correct preallocation
3857     */
3858 
3859     /* first get start and end of "diagonal" columns */
3860     if (csize == PETSC_DECIDE) {
3861       PetscCall(ISGetSize(isrow, &mglobal));
3862       if (mglobal == n) { /* square matrix */
3863         nlocal = m;
3864       } else {
3865         nlocal = n / size + ((n % size) > rank);
3866       }
3867     } else {
3868       nlocal = csize;
3869     }
3870     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3871     rstart = rend - nlocal;
3872     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3873 
3874     /* next, compute all the lengths */
3875     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3876     olens = dlens + m;
3877     for (i = 0; i < m; i++) {
3878       jend = ii[i + 1] - ii[i];
3879       olen = 0;
3880       dlen = 0;
3881       for (j = 0; j < jend; j++) {
3882         if (*jj < rstart || *jj >= rend) olen++;
3883         else dlen++;
3884         jj++;
3885       }
3886       olens[i] = olen;
3887       dlens[i] = dlen;
3888     }
3889     PetscCall(MatCreate(comm, &M));
3890     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3891     PetscCall(MatSetBlockSizes(M, bs, cbs));
3892     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3893     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3894     PetscCall(PetscFree(dlens));
3895   } else {
3896     PetscInt ml, nl;
3897 
3898     M = *newmat;
3899     PetscCall(MatGetLocalSize(M, &ml, &nl));
3900     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3901     PetscCall(MatZeroEntries(M));
3902     /*
3903          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3904        rather than the slower MatSetValues().
3905     */
3906     M->was_assembled = PETSC_TRUE;
3907     M->assembled     = PETSC_FALSE;
3908   }
3909   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3910   aij = (Mat_SeqAIJ *)Mreuse->data;
3911   ii  = aij->i;
3912   jj  = aij->j;
3913 
3914   /* trigger copy to CPU if needed */
3915   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3916   for (i = 0; i < m; i++) {
3917     row   = rstart + i;
3918     nz    = ii[i + 1] - ii[i];
3919     cwork = jj;
3920     jj    = PetscSafePointerPlusOffset(jj, nz);
3921     vwork = aa;
3922     aa    = PetscSafePointerPlusOffset(aa, nz);
3923     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3924   }
3925   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3926 
3927   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3928   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3929   *newmat = M;
3930 
3931   /* save submatrix used in processor for next request */
3932   if (call == MAT_INITIAL_MATRIX) {
3933     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3934     PetscCall(MatDestroy(&Mreuse));
3935   }
3936   PetscFunctionReturn(PETSC_SUCCESS);
3937 }
3938 
3939 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3940 {
3941   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3942   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3943   const PetscInt *JJ;
3944   PetscBool       nooffprocentries;
3945   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3946 
3947   PetscFunctionBegin;
3948   PetscCall(PetscLayoutSetUp(B->rmap));
3949   PetscCall(PetscLayoutSetUp(B->cmap));
3950   m       = B->rmap->n;
3951   cstart  = B->cmap->rstart;
3952   cend    = B->cmap->rend;
3953   rstart  = B->rmap->rstart;
3954   irstart = Ii[0];
3955 
3956   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3957 
3958   if (PetscDefined(USE_DEBUG)) {
3959     for (i = 0; i < m; i++) {
3960       nnz = Ii[i + 1] - Ii[i];
3961       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3962       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3963       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3964       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3965     }
3966   }
3967 
3968   for (i = 0; i < m; i++) {
3969     nnz     = Ii[i + 1] - Ii[i];
3970     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3971     nnz_max = PetscMax(nnz_max, nnz);
3972     d       = 0;
3973     for (j = 0; j < nnz; j++) {
3974       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3975     }
3976     d_nnz[i] = d;
3977     o_nnz[i] = nnz - d;
3978   }
3979   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3980   PetscCall(PetscFree2(d_nnz, o_nnz));
3981 
3982   for (i = 0; i < m; i++) {
3983     ii = i + rstart;
3984     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3985   }
3986   nooffprocentries    = B->nooffprocentries;
3987   B->nooffprocentries = PETSC_TRUE;
3988   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3989   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3990   B->nooffprocentries = nooffprocentries;
3991 
3992   /* count number of entries below block diagonal */
3993   PetscCall(PetscFree(Aij->ld));
3994   PetscCall(PetscCalloc1(m, &ld));
3995   Aij->ld = ld;
3996   for (i = 0; i < m; i++) {
3997     nnz = Ii[i + 1] - Ii[i];
3998     j   = 0;
3999     while (j < nnz && J[j] < cstart) j++;
4000     ld[i] = j;
4001     if (J) J += nnz;
4002   }
4003 
4004   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
4005   PetscFunctionReturn(PETSC_SUCCESS);
4006 }
4007 
4008 /*@
4009   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
4010   (the default parallel PETSc format).
4011 
4012   Collective
4013 
4014   Input Parameters:
4015 + B - the matrix
4016 . i - the indices into `j` for the start of each local row (indices start with zero)
4017 . j - the column indices for each local row (indices start with zero)
4018 - v - optional values in the matrix
4019 
4020   Level: developer
4021 
4022   Notes:
4023   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
4024   thus you CANNOT change the matrix entries by changing the values of `v` after you have
4025   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4026 
4027   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4028 
4029   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
4030 
4031   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
4032 
4033   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
4034   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4035 
4036   The format which is used for the sparse matrix input, is equivalent to a
4037   row-major ordering.. i.e for the following matrix, the input data expected is
4038   as shown
4039 .vb
4040         1 0 0
4041         2 0 3     P0
4042        -------
4043         4 5 6     P1
4044 
4045      Process0 [P0] rows_owned=[0,1]
4046         i =  {0,1,3}  [size = nrow+1  = 2+1]
4047         j =  {0,0,2}  [size = 3]
4048         v =  {1,2,3}  [size = 3]
4049 
4050      Process1 [P1] rows_owned=[2]
4051         i =  {0,3}    [size = nrow+1  = 1+1]
4052         j =  {0,1,2}  [size = 3]
4053         v =  {4,5,6}  [size = 3]
4054 .ve
4055 
4056 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4057           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4058 @*/
4059 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4060 {
4061   PetscFunctionBegin;
4062   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4063   PetscFunctionReturn(PETSC_SUCCESS);
4064 }
4065 
4066 /*@
4067   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4068   (the default parallel PETSc format).  For good matrix assembly performance
4069   the user should preallocate the matrix storage by setting the parameters
4070   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4071 
4072   Collective
4073 
4074   Input Parameters:
4075 + B     - the matrix
4076 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4077            (same value is used for all local rows)
4078 . d_nnz - array containing the number of nonzeros in the various rows of the
4079            DIAGONAL portion of the local submatrix (possibly different for each row)
4080            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4081            The size of this array is equal to the number of local rows, i.e 'm'.
4082            For matrices that will be factored, you must leave room for (and set)
4083            the diagonal entry even if it is zero.
4084 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4085            submatrix (same value is used for all local rows).
4086 - o_nnz - array containing the number of nonzeros in the various rows of the
4087            OFF-DIAGONAL portion of the local submatrix (possibly different for
4088            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4089            structure. The size of this array is equal to the number
4090            of local rows, i.e 'm'.
4091 
4092   Example Usage:
4093   Consider the following 8x8 matrix with 34 non-zero values, that is
4094   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4095   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4096   as follows
4097 
4098 .vb
4099             1  2  0  |  0  3  0  |  0  4
4100     Proc0   0  5  6  |  7  0  0  |  8  0
4101             9  0 10  | 11  0  0  | 12  0
4102     -------------------------------------
4103            13  0 14  | 15 16 17  |  0  0
4104     Proc1   0 18  0  | 19 20 21  |  0  0
4105             0  0  0  | 22 23  0  | 24  0
4106     -------------------------------------
4107     Proc2  25 26 27  |  0  0 28  | 29  0
4108            30  0  0  | 31 32 33  |  0 34
4109 .ve
4110 
4111   This can be represented as a collection of submatrices as
4112 .vb
4113       A B C
4114       D E F
4115       G H I
4116 .ve
4117 
4118   Where the submatrices A,B,C are owned by proc0, D,E,F are
4119   owned by proc1, G,H,I are owned by proc2.
4120 
4121   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4122   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4123   The 'M','N' parameters are 8,8, and have the same values on all procs.
4124 
4125   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4126   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4127   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4128   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4129   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4130   matrix, and [DF] as another `MATSEQAIJ` matrix.
4131 
4132   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4133   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4134   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4135   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4136   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4137   In this case, the values of `d_nz`, `o_nz` are
4138 .vb
4139      proc0  dnz = 2, o_nz = 2
4140      proc1  dnz = 3, o_nz = 2
4141      proc2  dnz = 1, o_nz = 4
4142 .ve
4143   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4144   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4145   for proc3. i.e we are using 12+15+10=37 storage locations to store
4146   34 values.
4147 
4148   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4149   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4150   In the above case the values for `d_nnz`, `o_nnz` are
4151 .vb
4152      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4153      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4154      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4155 .ve
4156   Here the space allocated is sum of all the above values i.e 34, and
4157   hence pre-allocation is perfect.
4158 
4159   Level: intermediate
4160 
4161   Notes:
4162   If the *_nnz parameter is given then the *_nz parameter is ignored
4163 
4164   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4165   storage.  The stored row and column indices begin with zero.
4166   See [Sparse Matrices](sec_matsparse) for details.
4167 
4168   The parallel matrix is partitioned such that the first m0 rows belong to
4169   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4170   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4171 
4172   The DIAGONAL portion of the local submatrix of a processor can be defined
4173   as the submatrix which is obtained by extraction the part corresponding to
4174   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4175   first row that belongs to the processor, r2 is the last row belonging to
4176   the this processor, and c1-c2 is range of indices of the local part of a
4177   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4178   common case of a square matrix, the row and column ranges are the same and
4179   the DIAGONAL part is also square. The remaining portion of the local
4180   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4181 
4182   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4183 
4184   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4185   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4186   You can also run with the option `-info` and look for messages with the string
4187   malloc in them to see if additional memory allocation was needed.
4188 
4189 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4190           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4191 @*/
4192 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4193 {
4194   PetscFunctionBegin;
4195   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4196   PetscValidType(B, 1);
4197   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4198   PetscFunctionReturn(PETSC_SUCCESS);
4199 }
4200 
4201 /*@
4202   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4203   CSR format for the local rows.
4204 
4205   Collective
4206 
4207   Input Parameters:
4208 + comm - MPI communicator
4209 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4210 . n    - This value should be the same as the local size used in creating the
4211          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4212          calculated if `N` is given) For square matrices n is almost always `m`.
4213 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4214 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4215 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4216 . j    - global column indices
4217 - a    - optional matrix values
4218 
4219   Output Parameter:
4220 . mat - the matrix
4221 
4222   Level: intermediate
4223 
4224   Notes:
4225   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4226   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4227   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4228 
4229   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4230 
4231   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4232 
4233   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4234   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4235 
4236   The format which is used for the sparse matrix input, is equivalent to a
4237   row-major ordering, i.e., for the following matrix, the input data expected is
4238   as shown
4239 .vb
4240         1 0 0
4241         2 0 3     P0
4242        -------
4243         4 5 6     P1
4244 
4245      Process0 [P0] rows_owned=[0,1]
4246         i =  {0,1,3}  [size = nrow+1  = 2+1]
4247         j =  {0,0,2}  [size = 3]
4248         v =  {1,2,3}  [size = 3]
4249 
4250      Process1 [P1] rows_owned=[2]
4251         i =  {0,3}    [size = nrow+1  = 1+1]
4252         j =  {0,1,2}  [size = 3]
4253         v =  {4,5,6}  [size = 3]
4254 .ve
4255 
4256 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4257           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4258 @*/
4259 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4260 {
4261   PetscFunctionBegin;
4262   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4263   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4264   PetscCall(MatCreate(comm, mat));
4265   PetscCall(MatSetSizes(*mat, m, n, M, N));
4266   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4267   PetscCall(MatSetType(*mat, MATMPIAIJ));
4268   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4269   PetscFunctionReturn(PETSC_SUCCESS);
4270 }
4271 
4272 /*@
4273   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4274   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4275   from `MatCreateMPIAIJWithArrays()`
4276 
4277   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4278 
4279   Collective
4280 
4281   Input Parameters:
4282 + mat - the matrix
4283 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4284 . n   - This value should be the same as the local size used in creating the
4285        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4286        calculated if N is given) For square matrices n is almost always m.
4287 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4288 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4289 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4290 . J   - column indices
4291 - v   - matrix values
4292 
4293   Level: deprecated
4294 
4295 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4296           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4297 @*/
4298 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4299 {
4300   PetscInt        nnz, i;
4301   PetscBool       nooffprocentries;
4302   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4303   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4304   PetscScalar    *ad, *ao;
4305   PetscInt        ldi, Iii, md;
4306   const PetscInt *Adi = Ad->i;
4307   PetscInt       *ld  = Aij->ld;
4308 
4309   PetscFunctionBegin;
4310   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4311   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4312   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4313   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4314 
4315   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4316   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4317 
4318   for (i = 0; i < m; i++) {
4319     if (PetscDefined(USE_DEBUG)) {
4320       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4321         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4322         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4323       }
4324     }
4325     nnz = Ii[i + 1] - Ii[i];
4326     Iii = Ii[i];
4327     ldi = ld[i];
4328     md  = Adi[i + 1] - Adi[i];
4329     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4330     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4331     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4332     ad += md;
4333     ao += nnz - md;
4334   }
4335   nooffprocentries      = mat->nooffprocentries;
4336   mat->nooffprocentries = PETSC_TRUE;
4337   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4338   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4339   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4340   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4341   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4342   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4343   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4344   mat->nooffprocentries = nooffprocentries;
4345   PetscFunctionReturn(PETSC_SUCCESS);
4346 }
4347 
4348 /*@
4349   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4350 
4351   Collective
4352 
4353   Input Parameters:
4354 + mat - the matrix
4355 - v   - matrix values, stored by row
4356 
4357   Level: intermediate
4358 
4359   Notes:
4360   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4361 
4362   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4363 
4364 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4365           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4366 @*/
4367 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4368 {
4369   PetscInt        nnz, i, m;
4370   PetscBool       nooffprocentries;
4371   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4372   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4373   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4374   PetscScalar    *ad, *ao;
4375   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4376   PetscInt        ldi, Iii, md;
4377   PetscInt       *ld = Aij->ld;
4378 
4379   PetscFunctionBegin;
4380   m = mat->rmap->n;
4381 
4382   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4383   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4384   Iii = 0;
4385   for (i = 0; i < m; i++) {
4386     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4387     ldi = ld[i];
4388     md  = Adi[i + 1] - Adi[i];
4389     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4390     ad += md;
4391     if (ao) {
4392       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4393       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4394       ao += nnz - md;
4395     }
4396     Iii += nnz;
4397   }
4398   nooffprocentries      = mat->nooffprocentries;
4399   mat->nooffprocentries = PETSC_TRUE;
4400   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4401   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4402   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4403   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4404   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4405   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4406   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4407   mat->nooffprocentries = nooffprocentries;
4408   PetscFunctionReturn(PETSC_SUCCESS);
4409 }
4410 
4411 /*@
4412   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4413   (the default parallel PETSc format).  For good matrix assembly performance
4414   the user should preallocate the matrix storage by setting the parameters
4415   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4416 
4417   Collective
4418 
4419   Input Parameters:
4420 + comm  - MPI communicator
4421 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4422           This value should be the same as the local size used in creating the
4423           y vector for the matrix-vector product y = Ax.
4424 . n     - This value should be the same as the local size used in creating the
4425           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4426           calculated if N is given) For square matrices n is almost always m.
4427 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4428 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4429 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4430           (same value is used for all local rows)
4431 . d_nnz - array containing the number of nonzeros in the various rows of the
4432           DIAGONAL portion of the local submatrix (possibly different for each row)
4433           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4434           The size of this array is equal to the number of local rows, i.e 'm'.
4435 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4436           submatrix (same value is used for all local rows).
4437 - o_nnz - array containing the number of nonzeros in the various rows of the
4438           OFF-DIAGONAL portion of the local submatrix (possibly different for
4439           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4440           structure. The size of this array is equal to the number
4441           of local rows, i.e 'm'.
4442 
4443   Output Parameter:
4444 . A - the matrix
4445 
4446   Options Database Keys:
4447 + -mat_no_inode                     - Do not use inodes
4448 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4449 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4450                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4451                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4452 
4453   Level: intermediate
4454 
4455   Notes:
4456   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4457   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4458   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4459 
4460   If the *_nnz parameter is given then the *_nz parameter is ignored
4461 
4462   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4463   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4464   storage requirements for this matrix.
4465 
4466   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4467   processor than it must be used on all processors that share the object for
4468   that argument.
4469 
4470   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4471   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4472 
4473   The user MUST specify either the local or global matrix dimensions
4474   (possibly both).
4475 
4476   The parallel matrix is partitioned across processors such that the
4477   first `m0` rows belong to process 0, the next `m1` rows belong to
4478   process 1, the next `m2` rows belong to process 2, etc., where
4479   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4480   values corresponding to [m x N] submatrix.
4481 
4482   The columns are logically partitioned with the n0 columns belonging
4483   to 0th partition, the next n1 columns belonging to the next
4484   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4485 
4486   The DIAGONAL portion of the local submatrix on any given processor
4487   is the submatrix corresponding to the rows and columns m,n
4488   corresponding to the given processor. i.e diagonal matrix on
4489   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4490   etc. The remaining portion of the local submatrix [m x (N-n)]
4491   constitute the OFF-DIAGONAL portion. The example below better
4492   illustrates this concept. The two matrices, the DIAGONAL portion and
4493   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4494 
4495   For a square global matrix we define each processor's diagonal portion
4496   to be its local rows and the corresponding columns (a square submatrix);
4497   each processor's off-diagonal portion encompasses the remainder of the
4498   local matrix (a rectangular submatrix).
4499 
4500   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4501 
4502   When calling this routine with a single process communicator, a matrix of
4503   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4504   type of communicator, use the construction mechanism
4505 .vb
4506   MatCreate(..., &A);
4507   MatSetType(A, MATMPIAIJ);
4508   MatSetSizes(A, m, n, M, N);
4509   MatMPIAIJSetPreallocation(A, ...);
4510 .ve
4511 
4512   By default, this format uses inodes (identical nodes) when possible.
4513   We search for consecutive rows with the same nonzero structure, thereby
4514   reusing matrix information to achieve increased efficiency.
4515 
4516   Example Usage:
4517   Consider the following 8x8 matrix with 34 non-zero values, that is
4518   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4519   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4520   as follows
4521 
4522 .vb
4523             1  2  0  |  0  3  0  |  0  4
4524     Proc0   0  5  6  |  7  0  0  |  8  0
4525             9  0 10  | 11  0  0  | 12  0
4526     -------------------------------------
4527            13  0 14  | 15 16 17  |  0  0
4528     Proc1   0 18  0  | 19 20 21  |  0  0
4529             0  0  0  | 22 23  0  | 24  0
4530     -------------------------------------
4531     Proc2  25 26 27  |  0  0 28  | 29  0
4532            30  0  0  | 31 32 33  |  0 34
4533 .ve
4534 
4535   This can be represented as a collection of submatrices as
4536 
4537 .vb
4538       A B C
4539       D E F
4540       G H I
4541 .ve
4542 
4543   Where the submatrices A,B,C are owned by proc0, D,E,F are
4544   owned by proc1, G,H,I are owned by proc2.
4545 
4546   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4547   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4548   The 'M','N' parameters are 8,8, and have the same values on all procs.
4549 
4550   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4551   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4552   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4553   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4554   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4555   matrix, and [DF] as another SeqAIJ matrix.
4556 
4557   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4558   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4559   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4560   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4561   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4562   In this case, the values of `d_nz`,`o_nz` are
4563 .vb
4564      proc0  dnz = 2, o_nz = 2
4565      proc1  dnz = 3, o_nz = 2
4566      proc2  dnz = 1, o_nz = 4
4567 .ve
4568   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4569   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4570   for proc3. i.e we are using 12+15+10=37 storage locations to store
4571   34 values.
4572 
4573   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4574   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4575   In the above case the values for d_nnz,o_nnz are
4576 .vb
4577      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4578      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4579      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4580 .ve
4581   Here the space allocated is sum of all the above values i.e 34, and
4582   hence pre-allocation is perfect.
4583 
4584 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4585           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4586           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4587 @*/
4588 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4589 {
4590   PetscMPIInt size;
4591 
4592   PetscFunctionBegin;
4593   PetscCall(MatCreate(comm, A));
4594   PetscCall(MatSetSizes(*A, m, n, M, N));
4595   PetscCallMPI(MPI_Comm_size(comm, &size));
4596   if (size > 1) {
4597     PetscCall(MatSetType(*A, MATMPIAIJ));
4598     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4599   } else {
4600     PetscCall(MatSetType(*A, MATSEQAIJ));
4601     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4602   }
4603   PetscFunctionReturn(PETSC_SUCCESS);
4604 }
4605 
4606 /*@C
4607   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4608 
4609   Not Collective
4610 
4611   Input Parameter:
4612 . A - The `MATMPIAIJ` matrix
4613 
4614   Output Parameters:
4615 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4616 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4617 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4618 
4619   Level: intermediate
4620 
4621   Note:
4622   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4623   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4624   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4625   local column numbers to global column numbers in the original matrix.
4626 
4627 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4628 @*/
4629 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4630 {
4631   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4632   PetscBool   flg;
4633 
4634   PetscFunctionBegin;
4635   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4636   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4637   if (Ad) *Ad = a->A;
4638   if (Ao) *Ao = a->B;
4639   if (colmap) *colmap = a->garray;
4640   PetscFunctionReturn(PETSC_SUCCESS);
4641 }
4642 
4643 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4644 {
4645   PetscInt     m, N, i, rstart, nnz, Ii;
4646   PetscInt    *indx;
4647   PetscScalar *values;
4648   MatType      rootType;
4649 
4650   PetscFunctionBegin;
4651   PetscCall(MatGetSize(inmat, &m, &N));
4652   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4653     PetscInt *dnz, *onz, sum, bs, cbs;
4654 
4655     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4656     /* Check sum(n) = N */
4657     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4658     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4659 
4660     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4661     rstart -= m;
4662 
4663     MatPreallocateBegin(comm, m, n, dnz, onz);
4664     for (i = 0; i < m; i++) {
4665       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4666       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4667       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4668     }
4669 
4670     PetscCall(MatCreate(comm, outmat));
4671     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4672     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4673     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4674     PetscCall(MatGetRootType_Private(inmat, &rootType));
4675     PetscCall(MatSetType(*outmat, rootType));
4676     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4677     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4678     MatPreallocateEnd(dnz, onz);
4679     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4680   }
4681 
4682   /* numeric phase */
4683   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4684   for (i = 0; i < m; i++) {
4685     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4686     Ii = i + rstart;
4687     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4688     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4689   }
4690   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4691   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4692   PetscFunctionReturn(PETSC_SUCCESS);
4693 }
4694 
4695 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data)
4696 {
4697   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data;
4698 
4699   PetscFunctionBegin;
4700   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4701   PetscCall(PetscFree(merge->id_r));
4702   PetscCall(PetscFree(merge->len_s));
4703   PetscCall(PetscFree(merge->len_r));
4704   PetscCall(PetscFree(merge->bi));
4705   PetscCall(PetscFree(merge->bj));
4706   PetscCall(PetscFree(merge->buf_ri[0]));
4707   PetscCall(PetscFree(merge->buf_ri));
4708   PetscCall(PetscFree(merge->buf_rj[0]));
4709   PetscCall(PetscFree(merge->buf_rj));
4710   PetscCall(PetscFree(merge->coi));
4711   PetscCall(PetscFree(merge->coj));
4712   PetscCall(PetscFree(merge->owners_co));
4713   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4714   PetscCall(PetscFree(merge));
4715   PetscFunctionReturn(PETSC_SUCCESS);
4716 }
4717 
4718 #include <../src/mat/utils/freespace.h>
4719 #include <petscbt.h>
4720 
4721 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4722 {
4723   MPI_Comm             comm;
4724   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4725   PetscMPIInt          size, rank, taga, *len_s;
4726   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4727   PetscMPIInt          proc, k;
4728   PetscInt           **buf_ri, **buf_rj;
4729   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4730   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4731   MPI_Request         *s_waits, *r_waits;
4732   MPI_Status          *status;
4733   const MatScalar     *aa, *a_a;
4734   MatScalar          **abuf_r, *ba_i;
4735   Mat_Merge_SeqsToMPI *merge;
4736   PetscContainer       container;
4737 
4738   PetscFunctionBegin;
4739   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4740   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4741 
4742   PetscCallMPI(MPI_Comm_size(comm, &size));
4743   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4744 
4745   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4746   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4747   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4748   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4749   aa = a_a;
4750 
4751   bi     = merge->bi;
4752   bj     = merge->bj;
4753   buf_ri = merge->buf_ri;
4754   buf_rj = merge->buf_rj;
4755 
4756   PetscCall(PetscMalloc1(size, &status));
4757   owners = merge->rowmap->range;
4758   len_s  = merge->len_s;
4759 
4760   /* send and recv matrix values */
4761   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4762   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4763 
4764   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4765   for (proc = 0, k = 0; proc < size; proc++) {
4766     if (!len_s[proc]) continue;
4767     i = owners[proc];
4768     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4769     k++;
4770   }
4771 
4772   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4773   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4774   PetscCall(PetscFree(status));
4775 
4776   PetscCall(PetscFree(s_waits));
4777   PetscCall(PetscFree(r_waits));
4778 
4779   /* insert mat values of mpimat */
4780   PetscCall(PetscMalloc1(N, &ba_i));
4781   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4782 
4783   for (k = 0; k < merge->nrecv; k++) {
4784     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4785     nrows       = *buf_ri_k[k];
4786     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4787     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4788   }
4789 
4790   /* set values of ba */
4791   m = merge->rowmap->n;
4792   for (i = 0; i < m; i++) {
4793     arow = owners[rank] + i;
4794     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4795     bnzi = bi[i + 1] - bi[i];
4796     PetscCall(PetscArrayzero(ba_i, bnzi));
4797 
4798     /* add local non-zero vals of this proc's seqmat into ba */
4799     anzi   = ai[arow + 1] - ai[arow];
4800     aj     = a->j + ai[arow];
4801     aa     = a_a + ai[arow];
4802     nextaj = 0;
4803     for (j = 0; nextaj < anzi; j++) {
4804       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4805         ba_i[j] += aa[nextaj++];
4806       }
4807     }
4808 
4809     /* add received vals into ba */
4810     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4811       /* i-th row */
4812       if (i == *nextrow[k]) {
4813         anzi   = *(nextai[k] + 1) - *nextai[k];
4814         aj     = buf_rj[k] + *nextai[k];
4815         aa     = abuf_r[k] + *nextai[k];
4816         nextaj = 0;
4817         for (j = 0; nextaj < anzi; j++) {
4818           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4819             ba_i[j] += aa[nextaj++];
4820           }
4821         }
4822         nextrow[k]++;
4823         nextai[k]++;
4824       }
4825     }
4826     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4827   }
4828   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4829   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4830   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4831 
4832   PetscCall(PetscFree(abuf_r[0]));
4833   PetscCall(PetscFree(abuf_r));
4834   PetscCall(PetscFree(ba_i));
4835   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4836   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4837   PetscFunctionReturn(PETSC_SUCCESS);
4838 }
4839 
4840 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4841 {
4842   Mat                  B_mpi;
4843   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4844   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4845   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4846   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4847   PetscInt             len, *dnz, *onz, bs, cbs;
4848   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4849   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4850   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4851   MPI_Status          *status;
4852   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4853   PetscBT              lnkbt;
4854   Mat_Merge_SeqsToMPI *merge;
4855   PetscContainer       container;
4856 
4857   PetscFunctionBegin;
4858   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4859 
4860   /* make sure it is a PETSc comm */
4861   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4862   PetscCallMPI(MPI_Comm_size(comm, &size));
4863   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4864 
4865   PetscCall(PetscNew(&merge));
4866   PetscCall(PetscMalloc1(size, &status));
4867 
4868   /* determine row ownership */
4869   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4870   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4871   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4872   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4873   PetscCall(PetscLayoutSetUp(merge->rowmap));
4874   PetscCall(PetscMalloc1(size, &len_si));
4875   PetscCall(PetscMalloc1(size, &merge->len_s));
4876 
4877   m      = merge->rowmap->n;
4878   owners = merge->rowmap->range;
4879 
4880   /* determine the number of messages to send, their lengths */
4881   len_s = merge->len_s;
4882 
4883   len          = 0; /* length of buf_si[] */
4884   merge->nsend = 0;
4885   for (PetscMPIInt proc = 0; proc < size; proc++) {
4886     len_si[proc] = 0;
4887     if (proc == rank) {
4888       len_s[proc] = 0;
4889     } else {
4890       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4891       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4892     }
4893     if (len_s[proc]) {
4894       merge->nsend++;
4895       nrows = 0;
4896       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4897         if (ai[i + 1] > ai[i]) nrows++;
4898       }
4899       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4900       len += len_si[proc];
4901     }
4902   }
4903 
4904   /* determine the number and length of messages to receive for ij-structure */
4905   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4906   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4907 
4908   /* post the Irecv of j-structure */
4909   PetscCall(PetscCommGetNewTag(comm, &tagj));
4910   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4911 
4912   /* post the Isend of j-structure */
4913   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4914 
4915   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4916     if (!len_s[proc]) continue;
4917     i = owners[proc];
4918     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4919     k++;
4920   }
4921 
4922   /* receives and sends of j-structure are complete */
4923   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4924   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4925 
4926   /* send and recv i-structure */
4927   PetscCall(PetscCommGetNewTag(comm, &tagi));
4928   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4929 
4930   PetscCall(PetscMalloc1(len + 1, &buf_s));
4931   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4932   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4933     if (!len_s[proc]) continue;
4934     /* form outgoing message for i-structure:
4935          buf_si[0]:                 nrows to be sent
4936                [1:nrows]:           row index (global)
4937                [nrows+1:2*nrows+1]: i-structure index
4938     */
4939     nrows       = len_si[proc] / 2 - 1;
4940     buf_si_i    = buf_si + nrows + 1;
4941     buf_si[0]   = nrows;
4942     buf_si_i[0] = 0;
4943     nrows       = 0;
4944     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4945       anzi = ai[i + 1] - ai[i];
4946       if (anzi) {
4947         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4948         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4949         nrows++;
4950       }
4951     }
4952     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4953     k++;
4954     buf_si += len_si[proc];
4955   }
4956 
4957   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4958   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4959 
4960   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4961   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4962 
4963   PetscCall(PetscFree(len_si));
4964   PetscCall(PetscFree(len_ri));
4965   PetscCall(PetscFree(rj_waits));
4966   PetscCall(PetscFree2(si_waits, sj_waits));
4967   PetscCall(PetscFree(ri_waits));
4968   PetscCall(PetscFree(buf_s));
4969   PetscCall(PetscFree(status));
4970 
4971   /* compute a local seq matrix in each processor */
4972   /* allocate bi array and free space for accumulating nonzero column info */
4973   PetscCall(PetscMalloc1(m + 1, &bi));
4974   bi[0] = 0;
4975 
4976   /* create and initialize a linked list */
4977   nlnk = N + 1;
4978   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4979 
4980   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4981   len = ai[owners[rank + 1]] - ai[owners[rank]];
4982   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4983 
4984   current_space = free_space;
4985 
4986   /* determine symbolic info for each local row */
4987   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4988 
4989   for (k = 0; k < merge->nrecv; k++) {
4990     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4991     nrows       = *buf_ri_k[k];
4992     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4993     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4994   }
4995 
4996   MatPreallocateBegin(comm, m, n, dnz, onz);
4997   len = 0;
4998   for (i = 0; i < m; i++) {
4999     bnzi = 0;
5000     /* add local non-zero cols of this proc's seqmat into lnk */
5001     arow = owners[rank] + i;
5002     anzi = ai[arow + 1] - ai[arow];
5003     aj   = a->j + ai[arow];
5004     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5005     bnzi += nlnk;
5006     /* add received col data into lnk */
5007     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5008       if (i == *nextrow[k]) {            /* i-th row */
5009         anzi = *(nextai[k] + 1) - *nextai[k];
5010         aj   = buf_rj[k] + *nextai[k];
5011         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5012         bnzi += nlnk;
5013         nextrow[k]++;
5014         nextai[k]++;
5015       }
5016     }
5017     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5018 
5019     /* if free space is not available, make more free space */
5020     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5021     /* copy data into free space, then initialize lnk */
5022     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5023     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5024 
5025     current_space->array += bnzi;
5026     current_space->local_used += bnzi;
5027     current_space->local_remaining -= bnzi;
5028 
5029     bi[i + 1] = bi[i] + bnzi;
5030   }
5031 
5032   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5033 
5034   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5035   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5036   PetscCall(PetscLLDestroy(lnk, lnkbt));
5037 
5038   /* create symbolic parallel matrix B_mpi */
5039   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5040   PetscCall(MatCreate(comm, &B_mpi));
5041   if (n == PETSC_DECIDE) {
5042     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5043   } else {
5044     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5045   }
5046   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5047   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5048   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5049   MatPreallocateEnd(dnz, onz);
5050   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5051 
5052   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5053   B_mpi->assembled = PETSC_FALSE;
5054   merge->bi        = bi;
5055   merge->bj        = bj;
5056   merge->buf_ri    = buf_ri;
5057   merge->buf_rj    = buf_rj;
5058   merge->coi       = NULL;
5059   merge->coj       = NULL;
5060   merge->owners_co = NULL;
5061 
5062   PetscCall(PetscCommDestroy(&comm));
5063 
5064   /* attach the supporting struct to B_mpi for reuse */
5065   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5066   PetscCall(PetscContainerSetPointer(container, merge));
5067   PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5068   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5069   PetscCall(PetscContainerDestroy(&container));
5070   *mpimat = B_mpi;
5071 
5072   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5073   PetscFunctionReturn(PETSC_SUCCESS);
5074 }
5075 
5076 /*@
5077   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5078   matrices from each processor
5079 
5080   Collective
5081 
5082   Input Parameters:
5083 + comm   - the communicators the parallel matrix will live on
5084 . seqmat - the input sequential matrices
5085 . m      - number of local rows (or `PETSC_DECIDE`)
5086 . n      - number of local columns (or `PETSC_DECIDE`)
5087 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5088 
5089   Output Parameter:
5090 . mpimat - the parallel matrix generated
5091 
5092   Level: advanced
5093 
5094   Note:
5095   The dimensions of the sequential matrix in each processor MUST be the same.
5096   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5097   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5098 
5099 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5100 @*/
5101 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5102 {
5103   PetscMPIInt size;
5104 
5105   PetscFunctionBegin;
5106   PetscCallMPI(MPI_Comm_size(comm, &size));
5107   if (size == 1) {
5108     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5109     if (scall == MAT_INITIAL_MATRIX) {
5110       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5111     } else {
5112       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5113     }
5114     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5115     PetscFunctionReturn(PETSC_SUCCESS);
5116   }
5117   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5118   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5119   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5120   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5121   PetscFunctionReturn(PETSC_SUCCESS);
5122 }
5123 
5124 /*@
5125   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5126 
5127   Not Collective
5128 
5129   Input Parameter:
5130 . A - the matrix
5131 
5132   Output Parameter:
5133 . A_loc - the local sequential matrix generated
5134 
5135   Level: developer
5136 
5137   Notes:
5138   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5139   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5140   `n` is the global column count obtained with `MatGetSize()`
5141 
5142   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5143 
5144   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5145 
5146   Destroy the matrix with `MatDestroy()`
5147 
5148 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5149 @*/
5150 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5151 {
5152   PetscBool mpi;
5153 
5154   PetscFunctionBegin;
5155   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5156   if (mpi) {
5157     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5158   } else {
5159     *A_loc = A;
5160     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5161   }
5162   PetscFunctionReturn(PETSC_SUCCESS);
5163 }
5164 
5165 /*@
5166   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5167 
5168   Not Collective
5169 
5170   Input Parameters:
5171 + A     - the matrix
5172 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5173 
5174   Output Parameter:
5175 . A_loc - the local sequential matrix generated
5176 
5177   Level: developer
5178 
5179   Notes:
5180   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5181   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5182   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5183 
5184   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5185 
5186   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5187   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5188   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5189   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5190 
5191 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5192 @*/
5193 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5194 {
5195   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5196   Mat_SeqAIJ        *mat, *a, *b;
5197   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5198   const PetscScalar *aa, *ba, *aav, *bav;
5199   PetscScalar       *ca, *cam;
5200   PetscMPIInt        size;
5201   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5202   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5203   PetscBool          match;
5204 
5205   PetscFunctionBegin;
5206   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5207   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5208   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5209   if (size == 1) {
5210     if (scall == MAT_INITIAL_MATRIX) {
5211       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5212       *A_loc = mpimat->A;
5213     } else if (scall == MAT_REUSE_MATRIX) {
5214       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5215     }
5216     PetscFunctionReturn(PETSC_SUCCESS);
5217   }
5218 
5219   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5220   a  = (Mat_SeqAIJ *)mpimat->A->data;
5221   b  = (Mat_SeqAIJ *)mpimat->B->data;
5222   ai = a->i;
5223   aj = a->j;
5224   bi = b->i;
5225   bj = b->j;
5226   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5227   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5228   aa = aav;
5229   ba = bav;
5230   if (scall == MAT_INITIAL_MATRIX) {
5231     PetscCall(PetscMalloc1(1 + am, &ci));
5232     ci[0] = 0;
5233     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5234     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5235     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5236     k = 0;
5237     for (i = 0; i < am; i++) {
5238       ncols_o = bi[i + 1] - bi[i];
5239       ncols_d = ai[i + 1] - ai[i];
5240       /* off-diagonal portion of A */
5241       for (jo = 0; jo < ncols_o; jo++) {
5242         col = cmap[*bj];
5243         if (col >= cstart) break;
5244         cj[k] = col;
5245         bj++;
5246         ca[k++] = *ba++;
5247       }
5248       /* diagonal portion of A */
5249       for (j = 0; j < ncols_d; j++) {
5250         cj[k]   = cstart + *aj++;
5251         ca[k++] = *aa++;
5252       }
5253       /* off-diagonal portion of A */
5254       for (j = jo; j < ncols_o; j++) {
5255         cj[k]   = cmap[*bj++];
5256         ca[k++] = *ba++;
5257       }
5258     }
5259     /* put together the new matrix */
5260     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5261     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5262     /* Since these are PETSc arrays, change flags to free them as necessary. */
5263     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5264     mat->free_a  = PETSC_TRUE;
5265     mat->free_ij = PETSC_TRUE;
5266     mat->nonew   = 0;
5267   } else if (scall == MAT_REUSE_MATRIX) {
5268     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5269     ci  = mat->i;
5270     cj  = mat->j;
5271     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5272     for (i = 0; i < am; i++) {
5273       /* off-diagonal portion of A */
5274       ncols_o = bi[i + 1] - bi[i];
5275       for (jo = 0; jo < ncols_o; jo++) {
5276         col = cmap[*bj];
5277         if (col >= cstart) break;
5278         *cam++ = *ba++;
5279         bj++;
5280       }
5281       /* diagonal portion of A */
5282       ncols_d = ai[i + 1] - ai[i];
5283       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5284       /* off-diagonal portion of A */
5285       for (j = jo; j < ncols_o; j++) {
5286         *cam++ = *ba++;
5287         bj++;
5288       }
5289     }
5290     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5291   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5292   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5293   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5294   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5295   PetscFunctionReturn(PETSC_SUCCESS);
5296 }
5297 
5298 /*@
5299   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5300   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5301 
5302   Not Collective
5303 
5304   Input Parameters:
5305 + A     - the matrix
5306 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5307 
5308   Output Parameters:
5309 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5310 - A_loc - the local sequential matrix generated
5311 
5312   Level: developer
5313 
5314   Note:
5315   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5316   part, then those associated with the off-diagonal part (in its local ordering)
5317 
5318 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5319 @*/
5320 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5321 {
5322   Mat             Ao, Ad;
5323   const PetscInt *cmap;
5324   PetscMPIInt     size;
5325   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5326 
5327   PetscFunctionBegin;
5328   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5329   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5330   if (size == 1) {
5331     if (scall == MAT_INITIAL_MATRIX) {
5332       PetscCall(PetscObjectReference((PetscObject)Ad));
5333       *A_loc = Ad;
5334     } else if (scall == MAT_REUSE_MATRIX) {
5335       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5336     }
5337     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5338     PetscFunctionReturn(PETSC_SUCCESS);
5339   }
5340   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5341   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5342   if (f) {
5343     PetscCall((*f)(A, scall, glob, A_loc));
5344   } else {
5345     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5346     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5347     Mat_SeqAIJ        *c;
5348     PetscInt          *ai = a->i, *aj = a->j;
5349     PetscInt          *bi = b->i, *bj = b->j;
5350     PetscInt          *ci, *cj;
5351     const PetscScalar *aa, *ba;
5352     PetscScalar       *ca;
5353     PetscInt           i, j, am, dn, on;
5354 
5355     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5356     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5357     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5358     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5359     if (scall == MAT_INITIAL_MATRIX) {
5360       PetscInt k;
5361       PetscCall(PetscMalloc1(1 + am, &ci));
5362       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5363       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5364       ci[0] = 0;
5365       for (i = 0, k = 0; i < am; i++) {
5366         const PetscInt ncols_o = bi[i + 1] - bi[i];
5367         const PetscInt ncols_d = ai[i + 1] - ai[i];
5368         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5369         /* diagonal portion of A */
5370         for (j = 0; j < ncols_d; j++, k++) {
5371           cj[k] = *aj++;
5372           ca[k] = *aa++;
5373         }
5374         /* off-diagonal portion of A */
5375         for (j = 0; j < ncols_o; j++, k++) {
5376           cj[k] = dn + *bj++;
5377           ca[k] = *ba++;
5378         }
5379       }
5380       /* put together the new matrix */
5381       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5382       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5383       /* Since these are PETSc arrays, change flags to free them as necessary. */
5384       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5385       c->free_a  = PETSC_TRUE;
5386       c->free_ij = PETSC_TRUE;
5387       c->nonew   = 0;
5388       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5389     } else if (scall == MAT_REUSE_MATRIX) {
5390       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5391       for (i = 0; i < am; i++) {
5392         const PetscInt ncols_d = ai[i + 1] - ai[i];
5393         const PetscInt ncols_o = bi[i + 1] - bi[i];
5394         /* diagonal portion of A */
5395         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5396         /* off-diagonal portion of A */
5397         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5398       }
5399       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5400     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5401     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5402     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5403     if (glob) {
5404       PetscInt cst, *gidx;
5405 
5406       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5407       PetscCall(PetscMalloc1(dn + on, &gidx));
5408       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5409       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5410       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5411     }
5412   }
5413   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5414   PetscFunctionReturn(PETSC_SUCCESS);
5415 }
5416 
5417 /*@C
5418   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5419 
5420   Not Collective
5421 
5422   Input Parameters:
5423 + A     - the matrix
5424 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5425 . row   - index set of rows to extract (or `NULL`)
5426 - col   - index set of columns to extract (or `NULL`)
5427 
5428   Output Parameter:
5429 . A_loc - the local sequential matrix generated
5430 
5431   Level: developer
5432 
5433 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5434 @*/
5435 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5436 {
5437   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5438   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5439   IS          isrowa, iscola;
5440   Mat        *aloc;
5441   PetscBool   match;
5442 
5443   PetscFunctionBegin;
5444   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5445   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5446   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5447   if (!row) {
5448     start = A->rmap->rstart;
5449     end   = A->rmap->rend;
5450     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5451   } else {
5452     isrowa = *row;
5453   }
5454   if (!col) {
5455     start = A->cmap->rstart;
5456     cmap  = a->garray;
5457     nzA   = a->A->cmap->n;
5458     nzB   = a->B->cmap->n;
5459     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5460     ncols = 0;
5461     for (i = 0; i < nzB; i++) {
5462       if (cmap[i] < start) idx[ncols++] = cmap[i];
5463       else break;
5464     }
5465     imark = i;
5466     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5467     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5468     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5469   } else {
5470     iscola = *col;
5471   }
5472   if (scall != MAT_INITIAL_MATRIX) {
5473     PetscCall(PetscMalloc1(1, &aloc));
5474     aloc[0] = *A_loc;
5475   }
5476   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5477   if (!col) { /* attach global id of condensed columns */
5478     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5479   }
5480   *A_loc = aloc[0];
5481   PetscCall(PetscFree(aloc));
5482   if (!row) PetscCall(ISDestroy(&isrowa));
5483   if (!col) PetscCall(ISDestroy(&iscola));
5484   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5485   PetscFunctionReturn(PETSC_SUCCESS);
5486 }
5487 
5488 /*
5489  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5490  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5491  * on a global size.
5492  * */
5493 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5494 {
5495   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5496   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5497   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5498   PetscMPIInt            owner;
5499   PetscSFNode           *iremote, *oiremote;
5500   const PetscInt        *lrowindices;
5501   PetscSF                sf, osf;
5502   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5503   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5504   MPI_Comm               comm;
5505   ISLocalToGlobalMapping mapping;
5506   const PetscScalar     *pd_a, *po_a;
5507 
5508   PetscFunctionBegin;
5509   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5510   /* plocalsize is the number of roots
5511    * nrows is the number of leaves
5512    * */
5513   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5514   PetscCall(ISGetLocalSize(rows, &nrows));
5515   PetscCall(PetscCalloc1(nrows, &iremote));
5516   PetscCall(ISGetIndices(rows, &lrowindices));
5517   for (i = 0; i < nrows; i++) {
5518     /* Find a remote index and an owner for a row
5519      * The row could be local or remote
5520      * */
5521     owner = 0;
5522     lidx  = 0;
5523     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5524     iremote[i].index = lidx;
5525     iremote[i].rank  = owner;
5526   }
5527   /* Create SF to communicate how many nonzero columns for each row */
5528   PetscCall(PetscSFCreate(comm, &sf));
5529   /* SF will figure out the number of nonzero columns for each row, and their
5530    * offsets
5531    * */
5532   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5533   PetscCall(PetscSFSetFromOptions(sf));
5534   PetscCall(PetscSFSetUp(sf));
5535 
5536   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5537   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5538   PetscCall(PetscCalloc1(nrows, &pnnz));
5539   roffsets[0] = 0;
5540   roffsets[1] = 0;
5541   for (i = 0; i < plocalsize; i++) {
5542     /* diagonal */
5543     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5544     /* off-diagonal */
5545     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5546     /* compute offsets so that we relative location for each row */
5547     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5548     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5549   }
5550   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5551   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5552   /* 'r' means root, and 'l' means leaf */
5553   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5554   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5555   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5556   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5557   PetscCall(PetscSFDestroy(&sf));
5558   PetscCall(PetscFree(roffsets));
5559   PetscCall(PetscFree(nrcols));
5560   dntotalcols = 0;
5561   ontotalcols = 0;
5562   ncol        = 0;
5563   for (i = 0; i < nrows; i++) {
5564     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5565     ncol    = PetscMax(pnnz[i], ncol);
5566     /* diagonal */
5567     dntotalcols += nlcols[i * 2 + 0];
5568     /* off-diagonal */
5569     ontotalcols += nlcols[i * 2 + 1];
5570   }
5571   /* We do not need to figure the right number of columns
5572    * since all the calculations will be done by going through the raw data
5573    * */
5574   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5575   PetscCall(MatSetUp(*P_oth));
5576   PetscCall(PetscFree(pnnz));
5577   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5578   /* diagonal */
5579   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5580   /* off-diagonal */
5581   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5582   /* diagonal */
5583   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5584   /* off-diagonal */
5585   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5586   dntotalcols = 0;
5587   ontotalcols = 0;
5588   ntotalcols  = 0;
5589   for (i = 0; i < nrows; i++) {
5590     owner = 0;
5591     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5592     /* Set iremote for diag matrix */
5593     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5594       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5595       iremote[dntotalcols].rank  = owner;
5596       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5597       ilocal[dntotalcols++] = ntotalcols++;
5598     }
5599     /* off-diagonal */
5600     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5601       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5602       oiremote[ontotalcols].rank  = owner;
5603       oilocal[ontotalcols++]      = ntotalcols++;
5604     }
5605   }
5606   PetscCall(ISRestoreIndices(rows, &lrowindices));
5607   PetscCall(PetscFree(loffsets));
5608   PetscCall(PetscFree(nlcols));
5609   PetscCall(PetscSFCreate(comm, &sf));
5610   /* P serves as roots and P_oth is leaves
5611    * Diag matrix
5612    * */
5613   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5614   PetscCall(PetscSFSetFromOptions(sf));
5615   PetscCall(PetscSFSetUp(sf));
5616 
5617   PetscCall(PetscSFCreate(comm, &osf));
5618   /* off-diagonal */
5619   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5620   PetscCall(PetscSFSetFromOptions(osf));
5621   PetscCall(PetscSFSetUp(osf));
5622   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5623   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5624   /* operate on the matrix internal data to save memory */
5625   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5626   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5627   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5628   /* Convert to global indices for diag matrix */
5629   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5630   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5631   /* We want P_oth store global indices */
5632   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5633   /* Use memory scalable approach */
5634   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5635   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5636   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5637   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5638   /* Convert back to local indices */
5639   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5640   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5641   nout = 0;
5642   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5643   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5644   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5645   /* Exchange values */
5646   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5647   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5648   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5649   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5650   /* Stop PETSc from shrinking memory */
5651   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5652   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5653   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5654   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5655   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5656   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5657   PetscCall(PetscSFDestroy(&sf));
5658   PetscCall(PetscSFDestroy(&osf));
5659   PetscFunctionReturn(PETSC_SUCCESS);
5660 }
5661 
5662 /*
5663  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5664  * This supports MPIAIJ and MAIJ
5665  * */
5666 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5667 {
5668   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5669   Mat_SeqAIJ *p_oth;
5670   IS          rows, map;
5671   PetscHMapI  hamp;
5672   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5673   MPI_Comm    comm;
5674   PetscSF     sf, osf;
5675   PetscBool   has;
5676 
5677   PetscFunctionBegin;
5678   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5679   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5680   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5681    *  and then create a submatrix (that often is an overlapping matrix)
5682    * */
5683   if (reuse == MAT_INITIAL_MATRIX) {
5684     /* Use a hash table to figure out unique keys */
5685     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5686     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5687     count = 0;
5688     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5689     for (i = 0; i < a->B->cmap->n; i++) {
5690       key = a->garray[i] / dof;
5691       PetscCall(PetscHMapIHas(hamp, key, &has));
5692       if (!has) {
5693         mapping[i] = count;
5694         PetscCall(PetscHMapISet(hamp, key, count++));
5695       } else {
5696         /* Current 'i' has the same value the previous step */
5697         mapping[i] = count - 1;
5698       }
5699     }
5700     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5701     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5702     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5703     PetscCall(PetscCalloc1(htsize, &rowindices));
5704     off = 0;
5705     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5706     PetscCall(PetscHMapIDestroy(&hamp));
5707     PetscCall(PetscSortInt(htsize, rowindices));
5708     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5709     /* In case, the matrix was already created but users want to recreate the matrix */
5710     PetscCall(MatDestroy(P_oth));
5711     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5712     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5713     PetscCall(ISDestroy(&map));
5714     PetscCall(ISDestroy(&rows));
5715   } else if (reuse == MAT_REUSE_MATRIX) {
5716     /* If matrix was already created, we simply update values using SF objects
5717      * that as attached to the matrix earlier.
5718      */
5719     const PetscScalar *pd_a, *po_a;
5720 
5721     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5722     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5723     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5724     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5725     /* Update values in place */
5726     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5727     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5728     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5729     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5730     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5731     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5732     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5733     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5734   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5735   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5736   PetscFunctionReturn(PETSC_SUCCESS);
5737 }
5738 
5739 /*@C
5740   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5741 
5742   Collective
5743 
5744   Input Parameters:
5745 + A     - the first matrix in `MATMPIAIJ` format
5746 . B     - the second matrix in `MATMPIAIJ` format
5747 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5748 
5749   Output Parameters:
5750 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5751 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5752 - B_seq - the sequential matrix generated
5753 
5754   Level: developer
5755 
5756 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5757 @*/
5758 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5759 {
5760   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5761   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5762   IS          isrowb, iscolb;
5763   Mat        *bseq = NULL;
5764 
5765   PetscFunctionBegin;
5766   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5767              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5768   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5769 
5770   if (scall == MAT_INITIAL_MATRIX) {
5771     start = A->cmap->rstart;
5772     cmap  = a->garray;
5773     nzA   = a->A->cmap->n;
5774     nzB   = a->B->cmap->n;
5775     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5776     ncols = 0;
5777     for (i = 0; i < nzB; i++) { /* row < local row index */
5778       if (cmap[i] < start) idx[ncols++] = cmap[i];
5779       else break;
5780     }
5781     imark = i;
5782     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5783     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5784     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5785     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5786   } else {
5787     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5788     isrowb = *rowb;
5789     iscolb = *colb;
5790     PetscCall(PetscMalloc1(1, &bseq));
5791     bseq[0] = *B_seq;
5792   }
5793   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5794   *B_seq = bseq[0];
5795   PetscCall(PetscFree(bseq));
5796   if (!rowb) {
5797     PetscCall(ISDestroy(&isrowb));
5798   } else {
5799     *rowb = isrowb;
5800   }
5801   if (!colb) {
5802     PetscCall(ISDestroy(&iscolb));
5803   } else {
5804     *colb = iscolb;
5805   }
5806   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5807   PetscFunctionReturn(PETSC_SUCCESS);
5808 }
5809 
5810 /*
5811     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5812     of the OFF-DIAGONAL portion of local A
5813 
5814     Collective
5815 
5816    Input Parameters:
5817 +    A,B - the matrices in `MATMPIAIJ` format
5818 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5819 
5820    Output Parameter:
5821 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5822 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5823 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5824 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5825 
5826     Developer Note:
5827     This directly accesses information inside the VecScatter associated with the matrix-vector product
5828      for this matrix. This is not desirable..
5829 
5830     Level: developer
5831 
5832 */
5833 
5834 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5835 {
5836   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5837   VecScatter         ctx;
5838   MPI_Comm           comm;
5839   const PetscMPIInt *rprocs, *sprocs;
5840   PetscMPIInt        nrecvs, nsends;
5841   const PetscInt    *srow, *rstarts, *sstarts;
5842   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5843   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5844   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5845   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5846   PetscMPIInt        size, tag, rank, nreqs;
5847 
5848   PetscFunctionBegin;
5849   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5850   PetscCallMPI(MPI_Comm_size(comm, &size));
5851 
5852   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5853              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5854   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5855   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5856 
5857   if (size == 1) {
5858     startsj_s = NULL;
5859     bufa_ptr  = NULL;
5860     *B_oth    = NULL;
5861     PetscFunctionReturn(PETSC_SUCCESS);
5862   }
5863 
5864   ctx = a->Mvctx;
5865   tag = ((PetscObject)ctx)->tag;
5866 
5867   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5868   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5869   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5870   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5871   PetscCall(PetscMalloc1(nreqs, &reqs));
5872   rwaits = reqs;
5873   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5874 
5875   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5876   if (scall == MAT_INITIAL_MATRIX) {
5877     /* i-array */
5878     /*  post receives */
5879     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5880     for (i = 0; i < nrecvs; i++) {
5881       rowlen = rvalues + rstarts[i] * rbs;
5882       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5883       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5884     }
5885 
5886     /* pack the outgoing message */
5887     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5888 
5889     sstartsj[0] = 0;
5890     rstartsj[0] = 0;
5891     len         = 0; /* total length of j or a array to be sent */
5892     if (nsends) {
5893       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5894       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5895     }
5896     for (i = 0; i < nsends; i++) {
5897       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5898       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5899       for (j = 0; j < nrows; j++) {
5900         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5901         for (l = 0; l < sbs; l++) {
5902           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5903 
5904           rowlen[j * sbs + l] = ncols;
5905 
5906           len += ncols;
5907           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5908         }
5909         k++;
5910       }
5911       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5912 
5913       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5914     }
5915     /* recvs and sends of i-array are completed */
5916     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5917     PetscCall(PetscFree(svalues));
5918 
5919     /* allocate buffers for sending j and a arrays */
5920     PetscCall(PetscMalloc1(len + 1, &bufj));
5921     PetscCall(PetscMalloc1(len + 1, &bufa));
5922 
5923     /* create i-array of B_oth */
5924     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5925 
5926     b_othi[0] = 0;
5927     len       = 0; /* total length of j or a array to be received */
5928     k         = 0;
5929     for (i = 0; i < nrecvs; i++) {
5930       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5931       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5932       for (j = 0; j < nrows; j++) {
5933         b_othi[k + 1] = b_othi[k] + rowlen[j];
5934         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5935         k++;
5936       }
5937       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5938     }
5939     PetscCall(PetscFree(rvalues));
5940 
5941     /* allocate space for j and a arrays of B_oth */
5942     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5943     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5944 
5945     /* j-array */
5946     /*  post receives of j-array */
5947     for (i = 0; i < nrecvs; i++) {
5948       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5949       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5950     }
5951 
5952     /* pack the outgoing message j-array */
5953     if (nsends) k = sstarts[0];
5954     for (i = 0; i < nsends; i++) {
5955       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5956       bufJ  = bufj + sstartsj[i];
5957       for (j = 0; j < nrows; j++) {
5958         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5959         for (ll = 0; ll < sbs; ll++) {
5960           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5961           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5962           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5963         }
5964       }
5965       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5966     }
5967 
5968     /* recvs and sends of j-array are completed */
5969     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5970   } else if (scall == MAT_REUSE_MATRIX) {
5971     sstartsj = *startsj_s;
5972     rstartsj = *startsj_r;
5973     bufa     = *bufa_ptr;
5974     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5975   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5976 
5977   /* a-array */
5978   /*  post receives of a-array */
5979   for (i = 0; i < nrecvs; i++) {
5980     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5981     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5982   }
5983 
5984   /* pack the outgoing message a-array */
5985   if (nsends) k = sstarts[0];
5986   for (i = 0; i < nsends; i++) {
5987     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5988     bufA  = bufa + sstartsj[i];
5989     for (j = 0; j < nrows; j++) {
5990       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5991       for (ll = 0; ll < sbs; ll++) {
5992         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5993         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5994         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5995       }
5996     }
5997     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5998   }
5999   /* recvs and sends of a-array are completed */
6000   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6001   PetscCall(PetscFree(reqs));
6002 
6003   if (scall == MAT_INITIAL_MATRIX) {
6004     Mat_SeqAIJ *b_oth;
6005 
6006     /* put together the new matrix */
6007     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6008 
6009     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6010     /* Since these are PETSc arrays, change flags to free them as necessary. */
6011     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6012     b_oth->free_a  = PETSC_TRUE;
6013     b_oth->free_ij = PETSC_TRUE;
6014     b_oth->nonew   = 0;
6015 
6016     PetscCall(PetscFree(bufj));
6017     if (!startsj_s || !bufa_ptr) {
6018       PetscCall(PetscFree2(sstartsj, rstartsj));
6019       PetscCall(PetscFree(bufa_ptr));
6020     } else {
6021       *startsj_s = sstartsj;
6022       *startsj_r = rstartsj;
6023       *bufa_ptr  = bufa;
6024     }
6025   } else if (scall == MAT_REUSE_MATRIX) {
6026     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6027   }
6028 
6029   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6030   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6031   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6032   PetscFunctionReturn(PETSC_SUCCESS);
6033 }
6034 
6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6038 #if defined(PETSC_HAVE_MKL_SPARSE)
6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6040 #endif
6041 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6042 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6043 #if defined(PETSC_HAVE_ELEMENTAL)
6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6045 #endif
6046 #if defined(PETSC_HAVE_SCALAPACK)
6047 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6048 #endif
6049 #if defined(PETSC_HAVE_HYPRE)
6050 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6051 #endif
6052 #if defined(PETSC_HAVE_CUDA)
6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6054 #endif
6055 #if defined(PETSC_HAVE_HIP)
6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6057 #endif
6058 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6059 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6060 #endif
6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6062 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6063 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6064 
6065 /*
6066     Computes (B'*A')' since computing B*A directly is untenable
6067 
6068                n                       p                          p
6069         [             ]       [             ]         [                 ]
6070       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6071         [             ]       [             ]         [                 ]
6072 
6073 */
6074 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6075 {
6076   Mat At, Bt, Ct;
6077 
6078   PetscFunctionBegin;
6079   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6080   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6081   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6082   PetscCall(MatDestroy(&At));
6083   PetscCall(MatDestroy(&Bt));
6084   PetscCall(MatTransposeSetPrecursor(Ct, C));
6085   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6086   PetscCall(MatDestroy(&Ct));
6087   PetscFunctionReturn(PETSC_SUCCESS);
6088 }
6089 
6090 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6091 {
6092   PetscBool cisdense;
6093 
6094   PetscFunctionBegin;
6095   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6096   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6097   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6098   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6099   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6100   PetscCall(MatSetUp(C));
6101 
6102   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6103   PetscFunctionReturn(PETSC_SUCCESS);
6104 }
6105 
6106 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6107 {
6108   Mat_Product *product = C->product;
6109   Mat          A = product->A, B = product->B;
6110 
6111   PetscFunctionBegin;
6112   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6113              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6114   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6115   C->ops->productsymbolic = MatProductSymbolic_AB;
6116   PetscFunctionReturn(PETSC_SUCCESS);
6117 }
6118 
6119 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6120 {
6121   Mat_Product *product = C->product;
6122 
6123   PetscFunctionBegin;
6124   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6125   PetscFunctionReturn(PETSC_SUCCESS);
6126 }
6127 
6128 /*
6129    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6130 
6131   Input Parameters:
6132 
6133     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6134     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6135 
6136     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6137 
6138     For Set1, j1[] contains column indices of the nonzeros.
6139     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6140     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6141     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6142 
6143     Similar for Set2.
6144 
6145     This routine merges the two sets of nonzeros row by row and removes repeats.
6146 
6147   Output Parameters: (memory is allocated by the caller)
6148 
6149     i[],j[]: the CSR of the merged matrix, which has m rows.
6150     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6151     imap2[]: similar to imap1[], but for Set2.
6152     Note we order nonzeros row-by-row and from left to right.
6153 */
6154 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6155 {
6156   PetscInt   r, m; /* Row index of mat */
6157   PetscCount t, t1, t2, b1, e1, b2, e2;
6158 
6159   PetscFunctionBegin;
6160   PetscCall(MatGetLocalSize(mat, &m, NULL));
6161   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6162   i[0]        = 0;
6163   for (r = 0; r < m; r++) { /* Do row by row merging */
6164     b1 = rowBegin1[r];
6165     e1 = rowEnd1[r];
6166     b2 = rowBegin2[r];
6167     e2 = rowEnd2[r];
6168     while (b1 < e1 && b2 < e2) {
6169       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6170         j[t]      = j1[b1];
6171         imap1[t1] = t;
6172         imap2[t2] = t;
6173         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6174         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6175         t1++;
6176         t2++;
6177         t++;
6178       } else if (j1[b1] < j2[b2]) {
6179         j[t]      = j1[b1];
6180         imap1[t1] = t;
6181         b1 += jmap1[t1 + 1] - jmap1[t1];
6182         t1++;
6183         t++;
6184       } else {
6185         j[t]      = j2[b2];
6186         imap2[t2] = t;
6187         b2 += jmap2[t2 + 1] - jmap2[t2];
6188         t2++;
6189         t++;
6190       }
6191     }
6192     /* Merge the remaining in either j1[] or j2[] */
6193     while (b1 < e1) {
6194       j[t]      = j1[b1];
6195       imap1[t1] = t;
6196       b1 += jmap1[t1 + 1] - jmap1[t1];
6197       t1++;
6198       t++;
6199     }
6200     while (b2 < e2) {
6201       j[t]      = j2[b2];
6202       imap2[t2] = t;
6203       b2 += jmap2[t2 + 1] - jmap2[t2];
6204       t2++;
6205       t++;
6206     }
6207     PetscCall(PetscIntCast(t, i + r + 1));
6208   }
6209   PetscFunctionReturn(PETSC_SUCCESS);
6210 }
6211 
6212 /*
6213   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6214 
6215   Input Parameters:
6216     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6217     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6218       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6219 
6220       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6221       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6222 
6223   Output Parameters:
6224     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6225     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6226       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6227       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6228 
6229     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6230       Atot: number of entries belonging to the diagonal block.
6231       Annz: number of unique nonzeros belonging to the diagonal block.
6232       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6233         repeats (i.e., same 'i,j' pair).
6234       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6235         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6236 
6237       Atot: number of entries belonging to the diagonal block
6238       Annz: number of unique nonzeros belonging to the diagonal block.
6239 
6240     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6241 
6242     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6243 */
6244 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6245 {
6246   PetscInt    cstart, cend, rstart, rend, row, col;
6247   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6248   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6249   PetscCount  k, m, p, q, r, s, mid;
6250   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6251 
6252   PetscFunctionBegin;
6253   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6254   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6255   m = rend - rstart;
6256 
6257   /* Skip negative rows */
6258   for (k = 0; k < n; k++)
6259     if (i[k] >= 0) break;
6260 
6261   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6262      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6263   */
6264   while (k < n) {
6265     row = i[k];
6266     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6267     for (s = k; s < n; s++)
6268       if (i[s] != row) break;
6269 
6270     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6271     for (p = k; p < s; p++) {
6272       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6273       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6274     }
6275     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6276     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6277     rowBegin[row - rstart] = k;
6278     rowMid[row - rstart]   = mid;
6279     rowEnd[row - rstart]   = s;
6280 
6281     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6282     Atot += mid - k;
6283     Btot += s - mid;
6284 
6285     /* Count unique nonzeros of this diag row */
6286     for (p = k; p < mid;) {
6287       col = j[p];
6288       do {
6289         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6290         p++;
6291       } while (p < mid && j[p] == col);
6292       Annz++;
6293     }
6294 
6295     /* Count unique nonzeros of this offdiag row */
6296     for (p = mid; p < s;) {
6297       col = j[p];
6298       do {
6299         p++;
6300       } while (p < s && j[p] == col);
6301       Bnnz++;
6302     }
6303     k = s;
6304   }
6305 
6306   /* Allocation according to Atot, Btot, Annz, Bnnz */
6307   PetscCall(PetscMalloc1(Atot, &Aperm));
6308   PetscCall(PetscMalloc1(Btot, &Bperm));
6309   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6310   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6311 
6312   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6313   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6314   for (r = 0; r < m; r++) {
6315     k   = rowBegin[r];
6316     mid = rowMid[r];
6317     s   = rowEnd[r];
6318     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6319     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6320     Atot += mid - k;
6321     Btot += s - mid;
6322 
6323     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6324     for (p = k; p < mid;) {
6325       col = j[p];
6326       q   = p;
6327       do {
6328         p++;
6329       } while (p < mid && j[p] == col);
6330       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6331       Annz++;
6332     }
6333 
6334     for (p = mid; p < s;) {
6335       col = j[p];
6336       q   = p;
6337       do {
6338         p++;
6339       } while (p < s && j[p] == col);
6340       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6341       Bnnz++;
6342     }
6343   }
6344   /* Output */
6345   *Aperm_ = Aperm;
6346   *Annz_  = Annz;
6347   *Atot_  = Atot;
6348   *Ajmap_ = Ajmap;
6349   *Bperm_ = Bperm;
6350   *Bnnz_  = Bnnz;
6351   *Btot_  = Btot;
6352   *Bjmap_ = Bjmap;
6353   PetscFunctionReturn(PETSC_SUCCESS);
6354 }
6355 
6356 /*
6357   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6358 
6359   Input Parameters:
6360     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6361     nnz:  number of unique nonzeros in the merged matrix
6362     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6363     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6364 
6365   Output Parameter: (memory is allocated by the caller)
6366     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6367 
6368   Example:
6369     nnz1 = 4
6370     nnz  = 6
6371     imap = [1,3,4,5]
6372     jmap = [0,3,5,6,7]
6373    then,
6374     jmap_new = [0,0,3,3,5,6,7]
6375 */
6376 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6377 {
6378   PetscCount k, p;
6379 
6380   PetscFunctionBegin;
6381   jmap_new[0] = 0;
6382   p           = nnz;                /* p loops over jmap_new[] backwards */
6383   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6384     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6385   }
6386   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6387   PetscFunctionReturn(PETSC_SUCCESS);
6388 }
6389 
6390 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data)
6391 {
6392   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data;
6393 
6394   PetscFunctionBegin;
6395   PetscCall(PetscSFDestroy(&coo->sf));
6396   PetscCall(PetscFree(coo->Aperm1));
6397   PetscCall(PetscFree(coo->Bperm1));
6398   PetscCall(PetscFree(coo->Ajmap1));
6399   PetscCall(PetscFree(coo->Bjmap1));
6400   PetscCall(PetscFree(coo->Aimap2));
6401   PetscCall(PetscFree(coo->Bimap2));
6402   PetscCall(PetscFree(coo->Aperm2));
6403   PetscCall(PetscFree(coo->Bperm2));
6404   PetscCall(PetscFree(coo->Ajmap2));
6405   PetscCall(PetscFree(coo->Bjmap2));
6406   PetscCall(PetscFree(coo->Cperm1));
6407   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6408   PetscCall(PetscFree(coo));
6409   PetscFunctionReturn(PETSC_SUCCESS);
6410 }
6411 
6412 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6413 {
6414   MPI_Comm             comm;
6415   PetscMPIInt          rank, size;
6416   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6417   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6418   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6419   PetscContainer       container;
6420   MatCOOStruct_MPIAIJ *coo;
6421 
6422   PetscFunctionBegin;
6423   PetscCall(PetscFree(mpiaij->garray));
6424   PetscCall(VecDestroy(&mpiaij->lvec));
6425 #if defined(PETSC_USE_CTABLE)
6426   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6427 #else
6428   PetscCall(PetscFree(mpiaij->colmap));
6429 #endif
6430   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6431   mat->assembled     = PETSC_FALSE;
6432   mat->was_assembled = PETSC_FALSE;
6433 
6434   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6435   PetscCallMPI(MPI_Comm_size(comm, &size));
6436   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6437   PetscCall(PetscLayoutSetUp(mat->rmap));
6438   PetscCall(PetscLayoutSetUp(mat->cmap));
6439   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6440   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6441   PetscCall(MatGetLocalSize(mat, &m, &n));
6442   PetscCall(MatGetSize(mat, &M, &N));
6443 
6444   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6445   /* entries come first, then local rows, then remote rows.                     */
6446   PetscCount n1 = coo_n, *perm1;
6447   PetscInt  *i1 = coo_i, *j1 = coo_j;
6448 
6449   PetscCall(PetscMalloc1(n1, &perm1));
6450   for (k = 0; k < n1; k++) perm1[k] = k;
6451 
6452   /* Manipulate indices so that entries with negative row or col indices will have smallest
6453      row indices, local entries will have greater but negative row indices, and remote entries
6454      will have positive row indices.
6455   */
6456   for (k = 0; k < n1; k++) {
6457     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6458     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6459     else {
6460       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6461       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6462     }
6463   }
6464 
6465   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6466   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6467 
6468   /* Advance k to the first entry we need to take care of */
6469   for (k = 0; k < n1; k++)
6470     if (i1[k] > PETSC_INT_MIN) break;
6471   PetscCount i1start = k;
6472 
6473   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6474   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6475 
6476   /*           Send remote rows to their owner                                  */
6477   /* Find which rows should be sent to which remote ranks*/
6478   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6479   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6480   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6481   const PetscInt *ranges;
6482   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6483 
6484   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6485   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6486   for (k = rem; k < n1;) {
6487     PetscMPIInt owner;
6488     PetscInt    firstRow, lastRow;
6489 
6490     /* Locate a row range */
6491     firstRow = i1[k]; /* first row of this owner */
6492     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6493     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6494 
6495     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6496     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6497 
6498     /* All entries in [k,p) belong to this remote owner */
6499     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6500       PetscMPIInt *sendto2;
6501       PetscInt    *nentries2;
6502       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6503 
6504       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6505       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6506       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6507       PetscCall(PetscFree2(sendto, nentries2));
6508       sendto   = sendto2;
6509       nentries = nentries2;
6510       maxNsend = maxNsend2;
6511     }
6512     sendto[nsend] = owner;
6513     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6514     nsend++;
6515     k = p;
6516   }
6517 
6518   /* Build 1st SF to know offsets on remote to send data */
6519   PetscSF      sf1;
6520   PetscInt     nroots = 1, nroots2 = 0;
6521   PetscInt     nleaves = nsend, nleaves2 = 0;
6522   PetscInt    *offsets;
6523   PetscSFNode *iremote;
6524 
6525   PetscCall(PetscSFCreate(comm, &sf1));
6526   PetscCall(PetscMalloc1(nsend, &iremote));
6527   PetscCall(PetscMalloc1(nsend, &offsets));
6528   for (k = 0; k < nsend; k++) {
6529     iremote[k].rank  = sendto[k];
6530     iremote[k].index = 0;
6531     nleaves2 += nentries[k];
6532     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6533   }
6534   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6535   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6536   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6537   PetscCall(PetscSFDestroy(&sf1));
6538   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6539 
6540   /* Build 2nd SF to send remote COOs to their owner */
6541   PetscSF sf2;
6542   nroots  = nroots2;
6543   nleaves = nleaves2;
6544   PetscCall(PetscSFCreate(comm, &sf2));
6545   PetscCall(PetscSFSetFromOptions(sf2));
6546   PetscCall(PetscMalloc1(nleaves, &iremote));
6547   p = 0;
6548   for (k = 0; k < nsend; k++) {
6549     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6550     for (q = 0; q < nentries[k]; q++, p++) {
6551       iremote[p].rank = sendto[k];
6552       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6553     }
6554   }
6555   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6556 
6557   /* Send the remote COOs to their owner */
6558   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6559   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6560   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6561   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6562   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6563   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6564   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6565   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6566   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6567   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6568   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6569 
6570   PetscCall(PetscFree(offsets));
6571   PetscCall(PetscFree2(sendto, nentries));
6572 
6573   /* Sort received COOs by row along with the permutation array     */
6574   for (k = 0; k < n2; k++) perm2[k] = k;
6575   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6576 
6577   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6578   PetscCount *Cperm1;
6579   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6580   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6581   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6582   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6583 
6584   /* Support for HYPRE matrices, kind of a hack.
6585      Swap min column with diagonal so that diagonal values will go first */
6586   PetscBool hypre;
6587   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6588   if (hypre) {
6589     PetscInt *minj;
6590     PetscBT   hasdiag;
6591 
6592     PetscCall(PetscBTCreate(m, &hasdiag));
6593     PetscCall(PetscMalloc1(m, &minj));
6594     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6595     for (k = i1start; k < rem; k++) {
6596       if (j1[k] < cstart || j1[k] >= cend) continue;
6597       const PetscInt rindex = i1[k] - rstart;
6598       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6599       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6600     }
6601     for (k = 0; k < n2; k++) {
6602       if (j2[k] < cstart || j2[k] >= cend) continue;
6603       const PetscInt rindex = i2[k] - rstart;
6604       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6605       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6606     }
6607     for (k = i1start; k < rem; k++) {
6608       const PetscInt rindex = i1[k] - rstart;
6609       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6610       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6611       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6612     }
6613     for (k = 0; k < n2; k++) {
6614       const PetscInt rindex = i2[k] - rstart;
6615       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6616       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6617       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6618     }
6619     PetscCall(PetscBTDestroy(&hasdiag));
6620     PetscCall(PetscFree(minj));
6621   }
6622 
6623   /* Split local COOs and received COOs into diag/offdiag portions */
6624   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6625   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6626   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6627   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6628   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6629   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6630 
6631   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6632   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6633   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6634   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6635 
6636   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6637   PetscInt *Ai, *Bi;
6638   PetscInt *Aj, *Bj;
6639 
6640   PetscCall(PetscMalloc1(m + 1, &Ai));
6641   PetscCall(PetscMalloc1(m + 1, &Bi));
6642   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6643   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6644 
6645   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6646   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6647   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6648   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6649   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6650 
6651   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6652   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6653 
6654   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6655   /* expect nonzeros in A/B most likely have local contributing entries        */
6656   PetscInt    Annz = Ai[m];
6657   PetscInt    Bnnz = Bi[m];
6658   PetscCount *Ajmap1_new, *Bjmap1_new;
6659 
6660   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6661   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6662 
6663   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6664   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6665 
6666   PetscCall(PetscFree(Aimap1));
6667   PetscCall(PetscFree(Ajmap1));
6668   PetscCall(PetscFree(Bimap1));
6669   PetscCall(PetscFree(Bjmap1));
6670   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6671   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6672   PetscCall(PetscFree(perm1));
6673   PetscCall(PetscFree3(i2, j2, perm2));
6674 
6675   Ajmap1 = Ajmap1_new;
6676   Bjmap1 = Bjmap1_new;
6677 
6678   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6679   if (Annz < Annz1 + Annz2) {
6680     PetscInt *Aj_new;
6681     PetscCall(PetscMalloc1(Annz, &Aj_new));
6682     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6683     PetscCall(PetscFree(Aj));
6684     Aj = Aj_new;
6685   }
6686 
6687   if (Bnnz < Bnnz1 + Bnnz2) {
6688     PetscInt *Bj_new;
6689     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6690     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6691     PetscCall(PetscFree(Bj));
6692     Bj = Bj_new;
6693   }
6694 
6695   /* Create new submatrices for on-process and off-process coupling                  */
6696   PetscScalar     *Aa, *Ba;
6697   MatType          rtype;
6698   Mat_SeqAIJ      *a, *b;
6699   PetscObjectState state;
6700   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6701   PetscCall(PetscCalloc1(Bnnz, &Ba));
6702   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6703   if (cstart) {
6704     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6705   }
6706 
6707   PetscCall(MatGetRootType_Private(mat, &rtype));
6708 
6709   MatSeqXAIJGetOptions_Private(mpiaij->A);
6710   PetscCall(MatDestroy(&mpiaij->A));
6711   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6712   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6713   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6714 
6715   MatSeqXAIJGetOptions_Private(mpiaij->B);
6716   PetscCall(MatDestroy(&mpiaij->B));
6717   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6718   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6719   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6720 
6721   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6722   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6723   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6724   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6725 
6726   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6727   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6728   a->free_a  = PETSC_TRUE;
6729   a->free_ij = PETSC_TRUE;
6730   b->free_a  = PETSC_TRUE;
6731   b->free_ij = PETSC_TRUE;
6732   a->maxnz   = a->nz;
6733   b->maxnz   = b->nz;
6734 
6735   /* conversion must happen AFTER multiply setup */
6736   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6737   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6738   PetscCall(VecDestroy(&mpiaij->lvec));
6739   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6740 
6741   // Put the COO struct in a container and then attach that to the matrix
6742   PetscCall(PetscMalloc1(1, &coo));
6743   coo->n       = coo_n;
6744   coo->sf      = sf2;
6745   coo->sendlen = nleaves;
6746   coo->recvlen = nroots;
6747   coo->Annz    = Annz;
6748   coo->Bnnz    = Bnnz;
6749   coo->Annz2   = Annz2;
6750   coo->Bnnz2   = Bnnz2;
6751   coo->Atot1   = Atot1;
6752   coo->Atot2   = Atot2;
6753   coo->Btot1   = Btot1;
6754   coo->Btot2   = Btot2;
6755   coo->Ajmap1  = Ajmap1;
6756   coo->Aperm1  = Aperm1;
6757   coo->Bjmap1  = Bjmap1;
6758   coo->Bperm1  = Bperm1;
6759   coo->Aimap2  = Aimap2;
6760   coo->Ajmap2  = Ajmap2;
6761   coo->Aperm2  = Aperm2;
6762   coo->Bimap2  = Bimap2;
6763   coo->Bjmap2  = Bjmap2;
6764   coo->Bperm2  = Bperm2;
6765   coo->Cperm1  = Cperm1;
6766   // Allocate in preallocation. If not used, it has zero cost on host
6767   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6768   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6769   PetscCall(PetscContainerSetPointer(container, coo));
6770   PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ));
6771   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6772   PetscCall(PetscContainerDestroy(&container));
6773   PetscFunctionReturn(PETSC_SUCCESS);
6774 }
6775 
6776 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6777 {
6778   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6779   Mat                  A = mpiaij->A, B = mpiaij->B;
6780   PetscScalar         *Aa, *Ba;
6781   PetscScalar         *sendbuf, *recvbuf;
6782   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6783   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6784   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6785   const PetscCount    *Cperm1;
6786   PetscContainer       container;
6787   MatCOOStruct_MPIAIJ *coo;
6788 
6789   PetscFunctionBegin;
6790   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6791   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6792   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6793   sendbuf = coo->sendbuf;
6794   recvbuf = coo->recvbuf;
6795   Ajmap1  = coo->Ajmap1;
6796   Ajmap2  = coo->Ajmap2;
6797   Aimap2  = coo->Aimap2;
6798   Bjmap1  = coo->Bjmap1;
6799   Bjmap2  = coo->Bjmap2;
6800   Bimap2  = coo->Bimap2;
6801   Aperm1  = coo->Aperm1;
6802   Aperm2  = coo->Aperm2;
6803   Bperm1  = coo->Bperm1;
6804   Bperm2  = coo->Bperm2;
6805   Cperm1  = coo->Cperm1;
6806 
6807   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6808   PetscCall(MatSeqAIJGetArray(B, &Ba));
6809 
6810   /* Pack entries to be sent to remote */
6811   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6812 
6813   /* Send remote entries to their owner and overlap the communication with local computation */
6814   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6815   /* Add local entries to A and B */
6816   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6817     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6818     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6819     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6820   }
6821   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6822     PetscScalar sum = 0.0;
6823     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6824     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6825   }
6826   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6827 
6828   /* Add received remote entries to A and B */
6829   for (PetscCount i = 0; i < coo->Annz2; i++) {
6830     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6831   }
6832   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6833     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6834   }
6835   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6836   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6837   PetscFunctionReturn(PETSC_SUCCESS);
6838 }
6839 
6840 /*MC
6841    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6842 
6843    Options Database Keys:
6844 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6845 
6846    Level: beginner
6847 
6848    Notes:
6849    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6850     in this case the values associated with the rows and columns one passes in are set to zero
6851     in the matrix
6852 
6853     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6854     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6855 
6856 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6857 M*/
6858 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6859 {
6860   Mat_MPIAIJ *b;
6861   PetscMPIInt size;
6862 
6863   PetscFunctionBegin;
6864   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6865 
6866   PetscCall(PetscNew(&b));
6867   B->data       = (void *)b;
6868   B->ops[0]     = MatOps_Values;
6869   B->assembled  = PETSC_FALSE;
6870   B->insertmode = NOT_SET_VALUES;
6871   b->size       = size;
6872 
6873   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6874 
6875   /* build cache for off array entries formed */
6876   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6877 
6878   b->donotstash  = PETSC_FALSE;
6879   b->colmap      = NULL;
6880   b->garray      = NULL;
6881   b->roworiented = PETSC_TRUE;
6882 
6883   /* stuff used for matrix vector multiply */
6884   b->lvec  = NULL;
6885   b->Mvctx = NULL;
6886 
6887   /* stuff for MatGetRow() */
6888   b->rowindices   = NULL;
6889   b->rowvalues    = NULL;
6890   b->getrowactive = PETSC_FALSE;
6891 
6892   /* flexible pointer used in CUSPARSE classes */
6893   b->spptr = NULL;
6894 
6895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6897   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6900   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ));
6902   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6906 #if defined(PETSC_HAVE_CUDA)
6907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6908 #endif
6909 #if defined(PETSC_HAVE_HIP)
6910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6911 #endif
6912 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6913   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6914 #endif
6915 #if defined(PETSC_HAVE_MKL_SPARSE)
6916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6917 #endif
6918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6922 #if defined(PETSC_HAVE_ELEMENTAL)
6923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6924 #endif
6925 #if defined(PETSC_HAVE_SCALAPACK)
6926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6927 #endif
6928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6930 #if defined(PETSC_HAVE_HYPRE)
6931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6933 #endif
6934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6938   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6939   PetscFunctionReturn(PETSC_SUCCESS);
6940 }
6941 
6942 /*@
6943   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6944   and "off-diagonal" part of the matrix in CSR format.
6945 
6946   Collective
6947 
6948   Input Parameters:
6949 + comm - MPI communicator
6950 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6951 . n    - This value should be the same as the local size used in creating the
6952          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6953          calculated if `N` is given) For square matrices `n` is almost always `m`.
6954 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6955 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6956 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6957 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6958 . a    - matrix values
6959 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6960 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6961 - oa   - matrix values
6962 
6963   Output Parameter:
6964 . mat - the matrix
6965 
6966   Level: advanced
6967 
6968   Notes:
6969   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6970   must free the arrays once the matrix has been destroyed and not before.
6971 
6972   The `i` and `j` indices are 0 based
6973 
6974   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6975 
6976   This sets local rows and cannot be used to set off-processor values.
6977 
6978   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6979   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6980   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6981   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6982   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6983   communication if it is known that only local entries will be set.
6984 
6985 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6986           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6987 @*/
6988 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6989 {
6990   Mat_MPIAIJ *maij;
6991 
6992   PetscFunctionBegin;
6993   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6994   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6995   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6996   PetscCall(MatCreate(comm, mat));
6997   PetscCall(MatSetSizes(*mat, m, n, M, N));
6998   PetscCall(MatSetType(*mat, MATMPIAIJ));
6999   maij = (Mat_MPIAIJ *)(*mat)->data;
7000 
7001   (*mat)->preallocated = PETSC_TRUE;
7002 
7003   PetscCall(PetscLayoutSetUp((*mat)->rmap));
7004   PetscCall(PetscLayoutSetUp((*mat)->cmap));
7005 
7006   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
7007   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7008 
7009   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7010   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7011   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7012   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7013   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7014   PetscFunctionReturn(PETSC_SUCCESS);
7015 }
7016 
7017 typedef struct {
7018   Mat       *mp;    /* intermediate products */
7019   PetscBool *mptmp; /* is the intermediate product temporary ? */
7020   PetscInt   cp;    /* number of intermediate products */
7021 
7022   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7023   PetscInt    *startsj_s, *startsj_r;
7024   PetscScalar *bufa;
7025   Mat          P_oth;
7026 
7027   /* may take advantage of merging product->B */
7028   Mat Bloc; /* B-local by merging diag and off-diag */
7029 
7030   /* cusparse does not have support to split between symbolic and numeric phases.
7031      When api_user is true, we don't need to update the numerical values
7032      of the temporary storage */
7033   PetscBool reusesym;
7034 
7035   /* support for COO values insertion */
7036   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7037   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7038   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7039   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7040   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7041   PetscMemType mtype;
7042 
7043   /* customization */
7044   PetscBool abmerge;
7045   PetscBool P_oth_bind;
7046 } MatMatMPIAIJBACKEND;
7047 
7048 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7049 {
7050   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7051   PetscInt             i;
7052 
7053   PetscFunctionBegin;
7054   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7055   PetscCall(PetscFree(mmdata->bufa));
7056   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7057   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7058   PetscCall(MatDestroy(&mmdata->P_oth));
7059   PetscCall(MatDestroy(&mmdata->Bloc));
7060   PetscCall(PetscSFDestroy(&mmdata->sf));
7061   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7062   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7063   PetscCall(PetscFree(mmdata->own[0]));
7064   PetscCall(PetscFree(mmdata->own));
7065   PetscCall(PetscFree(mmdata->off[0]));
7066   PetscCall(PetscFree(mmdata->off));
7067   PetscCall(PetscFree(mmdata));
7068   PetscFunctionReturn(PETSC_SUCCESS);
7069 }
7070 
7071 /* Copy selected n entries with indices in idx[] of A to v[].
7072    If idx is NULL, copy the whole data array of A to v[]
7073  */
7074 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7075 {
7076   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7077 
7078   PetscFunctionBegin;
7079   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7080   if (f) {
7081     PetscCall((*f)(A, n, idx, v));
7082   } else {
7083     const PetscScalar *vv;
7084 
7085     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7086     if (n && idx) {
7087       PetscScalar    *w  = v;
7088       const PetscInt *oi = idx;
7089       PetscInt        j;
7090 
7091       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7092     } else {
7093       PetscCall(PetscArraycpy(v, vv, n));
7094     }
7095     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7096   }
7097   PetscFunctionReturn(PETSC_SUCCESS);
7098 }
7099 
7100 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7101 {
7102   MatMatMPIAIJBACKEND *mmdata;
7103   PetscInt             i, n_d, n_o;
7104 
7105   PetscFunctionBegin;
7106   MatCheckProduct(C, 1);
7107   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7108   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7109   if (!mmdata->reusesym) { /* update temporary matrices */
7110     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7111     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7112   }
7113   mmdata->reusesym = PETSC_FALSE;
7114 
7115   for (i = 0; i < mmdata->cp; i++) {
7116     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7117     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7118   }
7119   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7120     PetscInt noff;
7121 
7122     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7123     if (mmdata->mptmp[i]) continue;
7124     if (noff) {
7125       PetscInt nown;
7126 
7127       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7128       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7129       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7130       n_o += noff;
7131       n_d += nown;
7132     } else {
7133       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7134 
7135       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7136       n_d += mm->nz;
7137     }
7138   }
7139   if (mmdata->hasoffproc) { /* offprocess insertion */
7140     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7141     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7142   }
7143   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7144   PetscFunctionReturn(PETSC_SUCCESS);
7145 }
7146 
7147 /* Support for Pt * A, A * P, or Pt * A * P */
7148 #define MAX_NUMBER_INTERMEDIATE 4
7149 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7150 {
7151   Mat_Product           *product = C->product;
7152   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7153   Mat_MPIAIJ            *a, *p;
7154   MatMatMPIAIJBACKEND   *mmdata;
7155   ISLocalToGlobalMapping P_oth_l2g = NULL;
7156   IS                     glob      = NULL;
7157   const char            *prefix;
7158   char                   pprefix[256];
7159   const PetscInt        *globidx, *P_oth_idx;
7160   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7161   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7162   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7163                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7164                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7165   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7166 
7167   MatProductType ptype;
7168   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7169   PetscMPIInt    size;
7170 
7171   PetscFunctionBegin;
7172   MatCheckProduct(C, 1);
7173   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7174   ptype = product->type;
7175   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7176     ptype                                          = MATPRODUCT_AB;
7177     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7178   }
7179   switch (ptype) {
7180   case MATPRODUCT_AB:
7181     A          = product->A;
7182     P          = product->B;
7183     m          = A->rmap->n;
7184     n          = P->cmap->n;
7185     M          = A->rmap->N;
7186     N          = P->cmap->N;
7187     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7188     break;
7189   case MATPRODUCT_AtB:
7190     P          = product->A;
7191     A          = product->B;
7192     m          = P->cmap->n;
7193     n          = A->cmap->n;
7194     M          = P->cmap->N;
7195     N          = A->cmap->N;
7196     hasoffproc = PETSC_TRUE;
7197     break;
7198   case MATPRODUCT_PtAP:
7199     A          = product->A;
7200     P          = product->B;
7201     m          = P->cmap->n;
7202     n          = P->cmap->n;
7203     M          = P->cmap->N;
7204     N          = P->cmap->N;
7205     hasoffproc = PETSC_TRUE;
7206     break;
7207   default:
7208     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7209   }
7210   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7211   if (size == 1) hasoffproc = PETSC_FALSE;
7212 
7213   /* defaults */
7214   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7215     mp[i]    = NULL;
7216     mptmp[i] = PETSC_FALSE;
7217     rmapt[i] = -1;
7218     cmapt[i] = -1;
7219     rmapa[i] = NULL;
7220     cmapa[i] = NULL;
7221   }
7222 
7223   /* customization */
7224   PetscCall(PetscNew(&mmdata));
7225   mmdata->reusesym = product->api_user;
7226   if (ptype == MATPRODUCT_AB) {
7227     if (product->api_user) {
7228       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7229       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7230       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7231       PetscOptionsEnd();
7232     } else {
7233       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7234       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7235       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7236       PetscOptionsEnd();
7237     }
7238   } else if (ptype == MATPRODUCT_PtAP) {
7239     if (product->api_user) {
7240       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7241       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7242       PetscOptionsEnd();
7243     } else {
7244       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7245       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7246       PetscOptionsEnd();
7247     }
7248   }
7249   a = (Mat_MPIAIJ *)A->data;
7250   p = (Mat_MPIAIJ *)P->data;
7251   PetscCall(MatSetSizes(C, m, n, M, N));
7252   PetscCall(PetscLayoutSetUp(C->rmap));
7253   PetscCall(PetscLayoutSetUp(C->cmap));
7254   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7255   PetscCall(MatGetOptionsPrefix(C, &prefix));
7256 
7257   cp = 0;
7258   switch (ptype) {
7259   case MATPRODUCT_AB: /* A * P */
7260     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7261 
7262     /* A_diag * P_local (merged or not) */
7263     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7264       /* P is product->B */
7265       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7266       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7267       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7268       PetscCall(MatProductSetFill(mp[cp], product->fill));
7269       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7270       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7271       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7272       mp[cp]->product->api_user = product->api_user;
7273       PetscCall(MatProductSetFromOptions(mp[cp]));
7274       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7275       PetscCall(ISGetIndices(glob, &globidx));
7276       rmapt[cp] = 1;
7277       cmapt[cp] = 2;
7278       cmapa[cp] = globidx;
7279       mptmp[cp] = PETSC_FALSE;
7280       cp++;
7281     } else { /* A_diag * P_diag and A_diag * P_off */
7282       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7283       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7284       PetscCall(MatProductSetFill(mp[cp], product->fill));
7285       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7286       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7287       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7288       mp[cp]->product->api_user = product->api_user;
7289       PetscCall(MatProductSetFromOptions(mp[cp]));
7290       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7291       rmapt[cp] = 1;
7292       cmapt[cp] = 1;
7293       mptmp[cp] = PETSC_FALSE;
7294       cp++;
7295       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7296       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7297       PetscCall(MatProductSetFill(mp[cp], product->fill));
7298       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7299       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7300       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7301       mp[cp]->product->api_user = product->api_user;
7302       PetscCall(MatProductSetFromOptions(mp[cp]));
7303       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7304       rmapt[cp] = 1;
7305       cmapt[cp] = 2;
7306       cmapa[cp] = p->garray;
7307       mptmp[cp] = PETSC_FALSE;
7308       cp++;
7309     }
7310 
7311     /* A_off * P_other */
7312     if (mmdata->P_oth) {
7313       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7314       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7315       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7316       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7317       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7318       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7319       PetscCall(MatProductSetFill(mp[cp], product->fill));
7320       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7321       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7322       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7323       mp[cp]->product->api_user = product->api_user;
7324       PetscCall(MatProductSetFromOptions(mp[cp]));
7325       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7326       rmapt[cp] = 1;
7327       cmapt[cp] = 2;
7328       cmapa[cp] = P_oth_idx;
7329       mptmp[cp] = PETSC_FALSE;
7330       cp++;
7331     }
7332     break;
7333 
7334   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7335     /* A is product->B */
7336     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7337     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7338       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7339       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7340       PetscCall(MatProductSetFill(mp[cp], product->fill));
7341       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7342       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7343       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7344       mp[cp]->product->api_user = product->api_user;
7345       PetscCall(MatProductSetFromOptions(mp[cp]));
7346       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7347       PetscCall(ISGetIndices(glob, &globidx));
7348       rmapt[cp] = 2;
7349       rmapa[cp] = globidx;
7350       cmapt[cp] = 2;
7351       cmapa[cp] = globidx;
7352       mptmp[cp] = PETSC_FALSE;
7353       cp++;
7354     } else {
7355       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7356       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7357       PetscCall(MatProductSetFill(mp[cp], product->fill));
7358       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7359       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7360       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7361       mp[cp]->product->api_user = product->api_user;
7362       PetscCall(MatProductSetFromOptions(mp[cp]));
7363       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7364       PetscCall(ISGetIndices(glob, &globidx));
7365       rmapt[cp] = 1;
7366       cmapt[cp] = 2;
7367       cmapa[cp] = globidx;
7368       mptmp[cp] = PETSC_FALSE;
7369       cp++;
7370       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7371       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7372       PetscCall(MatProductSetFill(mp[cp], product->fill));
7373       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7374       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7375       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7376       mp[cp]->product->api_user = product->api_user;
7377       PetscCall(MatProductSetFromOptions(mp[cp]));
7378       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7379       rmapt[cp] = 2;
7380       rmapa[cp] = p->garray;
7381       cmapt[cp] = 2;
7382       cmapa[cp] = globidx;
7383       mptmp[cp] = PETSC_FALSE;
7384       cp++;
7385     }
7386     break;
7387   case MATPRODUCT_PtAP:
7388     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7389     /* P is product->B */
7390     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7391     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7392     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7393     PetscCall(MatProductSetFill(mp[cp], product->fill));
7394     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7395     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7396     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7397     mp[cp]->product->api_user = product->api_user;
7398     PetscCall(MatProductSetFromOptions(mp[cp]));
7399     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7400     PetscCall(ISGetIndices(glob, &globidx));
7401     rmapt[cp] = 2;
7402     rmapa[cp] = globidx;
7403     cmapt[cp] = 2;
7404     cmapa[cp] = globidx;
7405     mptmp[cp] = PETSC_FALSE;
7406     cp++;
7407     if (mmdata->P_oth) {
7408       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7409       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7410       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7411       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7412       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7413       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7414       PetscCall(MatProductSetFill(mp[cp], product->fill));
7415       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7416       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7417       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7418       mp[cp]->product->api_user = product->api_user;
7419       PetscCall(MatProductSetFromOptions(mp[cp]));
7420       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7421       mptmp[cp] = PETSC_TRUE;
7422       cp++;
7423       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7424       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7425       PetscCall(MatProductSetFill(mp[cp], product->fill));
7426       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7427       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7428       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7429       mp[cp]->product->api_user = product->api_user;
7430       PetscCall(MatProductSetFromOptions(mp[cp]));
7431       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7432       rmapt[cp] = 2;
7433       rmapa[cp] = globidx;
7434       cmapt[cp] = 2;
7435       cmapa[cp] = P_oth_idx;
7436       mptmp[cp] = PETSC_FALSE;
7437       cp++;
7438     }
7439     break;
7440   default:
7441     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7442   }
7443   /* sanity check */
7444   if (size > 1)
7445     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7446 
7447   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7448   for (i = 0; i < cp; i++) {
7449     mmdata->mp[i]    = mp[i];
7450     mmdata->mptmp[i] = mptmp[i];
7451   }
7452   mmdata->cp             = cp;
7453   C->product->data       = mmdata;
7454   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7455   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7456 
7457   /* memory type */
7458   mmdata->mtype = PETSC_MEMTYPE_HOST;
7459   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7460   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7461   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7462   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7463   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7464   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7465 
7466   /* prepare coo coordinates for values insertion */
7467 
7468   /* count total nonzeros of those intermediate seqaij Mats
7469     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7470     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7471     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7472   */
7473   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7474     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7475     if (mptmp[cp]) continue;
7476     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7477       const PetscInt *rmap = rmapa[cp];
7478       const PetscInt  mr   = mp[cp]->rmap->n;
7479       const PetscInt  rs   = C->rmap->rstart;
7480       const PetscInt  re   = C->rmap->rend;
7481       const PetscInt *ii   = mm->i;
7482       for (i = 0; i < mr; i++) {
7483         const PetscInt gr = rmap[i];
7484         const PetscInt nz = ii[i + 1] - ii[i];
7485         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7486         else ncoo_oown += nz;                  /* this row is local */
7487       }
7488     } else ncoo_d += mm->nz;
7489   }
7490 
7491   /*
7492     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7493 
7494     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7495 
7496     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7497 
7498     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7499     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7500     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7501 
7502     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7503     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7504   */
7505   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7506   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7507 
7508   /* gather (i,j) of nonzeros inserted by remote procs */
7509   if (hasoffproc) {
7510     PetscSF  msf;
7511     PetscInt ncoo2, *coo_i2, *coo_j2;
7512 
7513     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7514     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7515     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7516 
7517     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7518       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7519       PetscInt   *idxoff = mmdata->off[cp];
7520       PetscInt   *idxown = mmdata->own[cp];
7521       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7522         const PetscInt *rmap = rmapa[cp];
7523         const PetscInt *cmap = cmapa[cp];
7524         const PetscInt *ii   = mm->i;
7525         PetscInt       *coi  = coo_i + ncoo_o;
7526         PetscInt       *coj  = coo_j + ncoo_o;
7527         const PetscInt  mr   = mp[cp]->rmap->n;
7528         const PetscInt  rs   = C->rmap->rstart;
7529         const PetscInt  re   = C->rmap->rend;
7530         const PetscInt  cs   = C->cmap->rstart;
7531         for (i = 0; i < mr; i++) {
7532           const PetscInt *jj = mm->j + ii[i];
7533           const PetscInt  gr = rmap[i];
7534           const PetscInt  nz = ii[i + 1] - ii[i];
7535           if (gr < rs || gr >= re) { /* this is an offproc row */
7536             for (j = ii[i]; j < ii[i + 1]; j++) {
7537               *coi++    = gr;
7538               *idxoff++ = j;
7539             }
7540             if (!cmapt[cp]) { /* already global */
7541               for (j = 0; j < nz; j++) *coj++ = jj[j];
7542             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7543               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7544             } else { /* offdiag */
7545               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7546             }
7547             ncoo_o += nz;
7548           } else { /* this is a local row */
7549             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7550           }
7551         }
7552       }
7553       mmdata->off[cp + 1] = idxoff;
7554       mmdata->own[cp + 1] = idxown;
7555     }
7556 
7557     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7558     PetscInt incoo_o;
7559     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7560     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7561     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7562     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7563     ncoo = ncoo_d + ncoo_oown + ncoo2;
7564     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7565     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7566     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7567     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7568     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7569     PetscCall(PetscFree2(coo_i, coo_j));
7570     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7571     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7572     coo_i = coo_i2;
7573     coo_j = coo_j2;
7574   } else { /* no offproc values insertion */
7575     ncoo = ncoo_d;
7576     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7577 
7578     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7579     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7580     PetscCall(PetscSFSetUp(mmdata->sf));
7581   }
7582   mmdata->hasoffproc = hasoffproc;
7583 
7584   /* gather (i,j) of nonzeros inserted locally */
7585   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7586     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7587     PetscInt       *coi  = coo_i + ncoo_d;
7588     PetscInt       *coj  = coo_j + ncoo_d;
7589     const PetscInt *jj   = mm->j;
7590     const PetscInt *ii   = mm->i;
7591     const PetscInt *cmap = cmapa[cp];
7592     const PetscInt *rmap = rmapa[cp];
7593     const PetscInt  mr   = mp[cp]->rmap->n;
7594     const PetscInt  rs   = C->rmap->rstart;
7595     const PetscInt  re   = C->rmap->rend;
7596     const PetscInt  cs   = C->cmap->rstart;
7597 
7598     if (mptmp[cp]) continue;
7599     if (rmapt[cp] == 1) { /* consecutive rows */
7600       /* fill coo_i */
7601       for (i = 0; i < mr; i++) {
7602         const PetscInt gr = i + rs;
7603         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7604       }
7605       /* fill coo_j */
7606       if (!cmapt[cp]) { /* type-0, already global */
7607         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7608       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7609         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7610       } else {                                            /* type-2, local to global for sparse columns */
7611         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7612       }
7613       ncoo_d += mm->nz;
7614     } else if (rmapt[cp] == 2) { /* sparse rows */
7615       for (i = 0; i < mr; i++) {
7616         const PetscInt *jj = mm->j + ii[i];
7617         const PetscInt  gr = rmap[i];
7618         const PetscInt  nz = ii[i + 1] - ii[i];
7619         if (gr >= rs && gr < re) { /* local rows */
7620           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7621           if (!cmapt[cp]) { /* type-0, already global */
7622             for (j = 0; j < nz; j++) *coj++ = jj[j];
7623           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7624             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7625           } else { /* type-2, local to global for sparse columns */
7626             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7627           }
7628           ncoo_d += nz;
7629         }
7630       }
7631     }
7632   }
7633   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7634   PetscCall(ISDestroy(&glob));
7635   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7636   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7637   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7638   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7639 
7640   /* set block sizes */
7641   A = product->A;
7642   P = product->B;
7643   switch (ptype) {
7644   case MATPRODUCT_PtAP:
7645     PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs));
7646     break;
7647   case MATPRODUCT_RARt:
7648     PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs));
7649     break;
7650   case MATPRODUCT_ABC:
7651     PetscCall(MatSetBlockSizesFromMats(C, A, product->C));
7652     break;
7653   case MATPRODUCT_AB:
7654     PetscCall(MatSetBlockSizesFromMats(C, A, P));
7655     break;
7656   case MATPRODUCT_AtB:
7657     PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs));
7658     break;
7659   case MATPRODUCT_ABt:
7660     PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs));
7661     break;
7662   default:
7663     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]);
7664   }
7665 
7666   /* preallocate with COO data */
7667   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7668   PetscCall(PetscFree2(coo_i, coo_j));
7669   PetscFunctionReturn(PETSC_SUCCESS);
7670 }
7671 
7672 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7673 {
7674   Mat_Product *product = mat->product;
7675 #if defined(PETSC_HAVE_DEVICE)
7676   PetscBool match  = PETSC_FALSE;
7677   PetscBool usecpu = PETSC_FALSE;
7678 #else
7679   PetscBool match = PETSC_TRUE;
7680 #endif
7681 
7682   PetscFunctionBegin;
7683   MatCheckProduct(mat, 1);
7684 #if defined(PETSC_HAVE_DEVICE)
7685   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7686   if (match) { /* we can always fallback to the CPU if requested */
7687     switch (product->type) {
7688     case MATPRODUCT_AB:
7689       if (product->api_user) {
7690         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7691         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7692         PetscOptionsEnd();
7693       } else {
7694         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7695         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7696         PetscOptionsEnd();
7697       }
7698       break;
7699     case MATPRODUCT_AtB:
7700       if (product->api_user) {
7701         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7702         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7703         PetscOptionsEnd();
7704       } else {
7705         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7706         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7707         PetscOptionsEnd();
7708       }
7709       break;
7710     case MATPRODUCT_PtAP:
7711       if (product->api_user) {
7712         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7713         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7714         PetscOptionsEnd();
7715       } else {
7716         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7717         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7718         PetscOptionsEnd();
7719       }
7720       break;
7721     default:
7722       break;
7723     }
7724     match = (PetscBool)!usecpu;
7725   }
7726 #endif
7727   if (match) {
7728     switch (product->type) {
7729     case MATPRODUCT_AB:
7730     case MATPRODUCT_AtB:
7731     case MATPRODUCT_PtAP:
7732       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7733       break;
7734     default:
7735       break;
7736     }
7737   }
7738   /* fallback to MPIAIJ ops */
7739   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7740   PetscFunctionReturn(PETSC_SUCCESS);
7741 }
7742 
7743 /*
7744    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7745 
7746    n - the number of block indices in cc[]
7747    cc - the block indices (must be large enough to contain the indices)
7748 */
7749 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7750 {
7751   PetscInt        cnt = -1, nidx, j;
7752   const PetscInt *idx;
7753 
7754   PetscFunctionBegin;
7755   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7756   if (nidx) {
7757     cnt     = 0;
7758     cc[cnt] = idx[0] / bs;
7759     for (j = 1; j < nidx; j++) {
7760       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7761     }
7762   }
7763   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7764   *n = cnt + 1;
7765   PetscFunctionReturn(PETSC_SUCCESS);
7766 }
7767 
7768 /*
7769     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7770 
7771     ncollapsed - the number of block indices
7772     collapsed - the block indices (must be large enough to contain the indices)
7773 */
7774 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7775 {
7776   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7777 
7778   PetscFunctionBegin;
7779   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7780   for (i = start + 1; i < start + bs; i++) {
7781     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7782     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7783     cprevtmp = cprev;
7784     cprev    = merged;
7785     merged   = cprevtmp;
7786   }
7787   *ncollapsed = nprev;
7788   if (collapsed) *collapsed = cprev;
7789   PetscFunctionReturn(PETSC_SUCCESS);
7790 }
7791 
7792 /*
7793  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7794 
7795  Input Parameter:
7796  . Amat - matrix
7797  - symmetrize - make the result symmetric
7798  + scale - scale with diagonal
7799 
7800  Output Parameter:
7801  . a_Gmat - output scalar graph >= 0
7802 
7803 */
7804 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7805 {
7806   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7807   MPI_Comm  comm;
7808   Mat       Gmat;
7809   PetscBool ismpiaij, isseqaij;
7810   Mat       a, b, c;
7811   MatType   jtype;
7812 
7813   PetscFunctionBegin;
7814   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7815   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7816   PetscCall(MatGetSize(Amat, &MM, &NN));
7817   PetscCall(MatGetBlockSize(Amat, &bs));
7818   nloc = (Iend - Istart) / bs;
7819 
7820   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7821   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7822   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7823 
7824   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7825   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7826      implementation */
7827   if (bs > 1) {
7828     PetscCall(MatGetType(Amat, &jtype));
7829     PetscCall(MatCreate(comm, &Gmat));
7830     PetscCall(MatSetType(Gmat, jtype));
7831     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7832     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7833     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7834       PetscInt  *d_nnz, *o_nnz;
7835       MatScalar *aa, val, *AA;
7836       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7837 
7838       if (isseqaij) {
7839         a = Amat;
7840         b = NULL;
7841       } else {
7842         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7843         a             = d->A;
7844         b             = d->B;
7845       }
7846       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7847       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7848       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7849         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7850         const PetscInt *cols1, *cols2;
7851 
7852         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7853           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7854           nnz[brow / bs] = nc2 / bs;
7855           if (nc2 % bs) ok = 0;
7856           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7857           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7858             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7859             if (nc1 != nc2) ok = 0;
7860             else {
7861               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7862                 if (cols1[jj] != cols2[jj]) ok = 0;
7863                 if (cols1[jj] % bs != jj % bs) ok = 0;
7864               }
7865             }
7866             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7867           }
7868           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7869           if (!ok) {
7870             PetscCall(PetscFree2(d_nnz, o_nnz));
7871             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7872             goto old_bs;
7873           }
7874         }
7875       }
7876       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7877       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7878       PetscCall(PetscFree2(d_nnz, o_nnz));
7879       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7880       // diag
7881       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7882         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7883 
7884         ai = aseq->i;
7885         n  = ai[brow + 1] - ai[brow];
7886         aj = aseq->j + ai[brow];
7887         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7888           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7889           val        = 0;
7890           if (index_size == 0) {
7891             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7892               aa = aseq->a + ai[brow + ii] + k;
7893               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7894                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7895               }
7896             }
7897           } else {                                            // use (index,index) value if provided
7898             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7899               PetscInt ii = index[iii];
7900               aa          = aseq->a + ai[brow + ii] + k;
7901               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7902                 PetscInt jj = index[jjj];
7903                 val += PetscAbs(PetscRealPart(aa[jj]));
7904               }
7905             }
7906           }
7907           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7908           AA[k / bs] = val;
7909         }
7910         grow = Istart / bs + brow / bs;
7911         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7912       }
7913       // off-diag
7914       if (ismpiaij) {
7915         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7916         const PetscScalar *vals;
7917         const PetscInt    *cols, *garray = aij->garray;
7918 
7919         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7920         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7921           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7922           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7923             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7924             AA[k / bs] = 0;
7925             AJ[cidx]   = garray[cols[k]] / bs;
7926           }
7927           nc = ncols / bs;
7928           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7929           if (index_size == 0) {
7930             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7931               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7932               for (PetscInt k = 0; k < ncols; k += bs) {
7933                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7934                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7935                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7936                 }
7937               }
7938               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7939             }
7940           } else {                                            // use (index,index) value if provided
7941             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7942               PetscInt ii = index[iii];
7943               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7944               for (PetscInt k = 0; k < ncols; k += bs) {
7945                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7946                   PetscInt jj = index[jjj];
7947                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7948                 }
7949               }
7950               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7951             }
7952           }
7953           grow = Istart / bs + brow / bs;
7954           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7955         }
7956       }
7957       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7958       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7959       PetscCall(PetscFree2(AA, AJ));
7960     } else {
7961       const PetscScalar *vals;
7962       const PetscInt    *idx;
7963       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7964     old_bs:
7965       /*
7966        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7967        */
7968       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7969       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7970       if (isseqaij) {
7971         PetscInt max_d_nnz;
7972 
7973         /*
7974          Determine exact preallocation count for (sequential) scalar matrix
7975          */
7976         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7977         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7978         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7979         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7980         PetscCall(PetscFree3(w0, w1, w2));
7981       } else if (ismpiaij) {
7982         Mat             Daij, Oaij;
7983         const PetscInt *garray;
7984         PetscInt        max_d_nnz;
7985 
7986         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7987         /*
7988          Determine exact preallocation count for diagonal block portion of scalar matrix
7989          */
7990         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7991         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7992         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7993         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7994         PetscCall(PetscFree3(w0, w1, w2));
7995         /*
7996          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7997          */
7998         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7999           o_nnz[jj] = 0;
8000           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
8001             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
8002             o_nnz[jj] += ncols;
8003             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
8004           }
8005           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
8006         }
8007       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
8008       /* get scalar copy (norms) of matrix */
8009       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
8010       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
8011       PetscCall(PetscFree2(d_nnz, o_nnz));
8012       for (Ii = Istart; Ii < Iend; Ii++) {
8013         PetscInt dest_row = Ii / bs;
8014 
8015         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
8016         for (jj = 0; jj < ncols; jj++) {
8017           PetscInt    dest_col = idx[jj] / bs;
8018           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
8019 
8020           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
8021         }
8022         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
8023       }
8024       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
8025       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
8026     }
8027   } else {
8028     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8029     else {
8030       Gmat = Amat;
8031       PetscCall(PetscObjectReference((PetscObject)Gmat));
8032     }
8033     if (isseqaij) {
8034       a = Gmat;
8035       b = NULL;
8036     } else {
8037       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8038       a             = d->A;
8039       b             = d->B;
8040     }
8041     if (filter >= 0 || scale) {
8042       /* take absolute value of each entry */
8043       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8044         MatInfo      info;
8045         PetscScalar *avals;
8046 
8047         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8048         PetscCall(MatSeqAIJGetArray(c, &avals));
8049         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8050         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8051       }
8052     }
8053   }
8054   if (symmetrize) {
8055     PetscBool isset, issym;
8056 
8057     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8058     if (!isset || !issym) {
8059       Mat matTrans;
8060 
8061       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8062       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8063       PetscCall(MatDestroy(&matTrans));
8064     }
8065     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8066   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8067   if (scale) {
8068     /* scale c for all diagonal values = 1 or -1 */
8069     Vec diag;
8070 
8071     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8072     PetscCall(MatGetDiagonal(Gmat, diag));
8073     PetscCall(VecReciprocal(diag));
8074     PetscCall(VecSqrtAbs(diag));
8075     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8076     PetscCall(VecDestroy(&diag));
8077   }
8078   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8079   if (filter >= 0) {
8080     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8081     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8082   }
8083   *a_Gmat = Gmat;
8084   PetscFunctionReturn(PETSC_SUCCESS);
8085 }
8086 
8087 /*
8088     Special version for direct calls from Fortran
8089 */
8090 
8091 /* Change these macros so can be used in void function */
8092 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8093 #undef PetscCall
8094 #define PetscCall(...) \
8095   do { \
8096     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8097     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8098       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8099       return; \
8100     } \
8101   } while (0)
8102 
8103 #undef SETERRQ
8104 #define SETERRQ(comm, ierr, ...) \
8105   do { \
8106     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8107     return; \
8108   } while (0)
8109 
8110 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8111   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8112 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8113   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8114 #else
8115 #endif
8116 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8117 {
8118   Mat         mat = *mmat;
8119   PetscInt    m = *mm, n = *mn;
8120   InsertMode  addv = *maddv;
8121   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8122   PetscScalar value;
8123 
8124   MatCheckPreallocated(mat, 1);
8125   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8126   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8127   {
8128     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8129     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8130     PetscBool roworiented = aij->roworiented;
8131 
8132     /* Some Variables required in the macro */
8133     Mat         A     = aij->A;
8134     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8135     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8136     MatScalar  *aa;
8137     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8138     Mat         B                 = aij->B;
8139     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8140     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8141     MatScalar  *ba;
8142     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8143      * cannot use "#if defined" inside a macro. */
8144     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8145 
8146     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8147     PetscInt   nonew = a->nonew;
8148     MatScalar *ap1, *ap2;
8149 
8150     PetscFunctionBegin;
8151     PetscCall(MatSeqAIJGetArray(A, &aa));
8152     PetscCall(MatSeqAIJGetArray(B, &ba));
8153     for (i = 0; i < m; i++) {
8154       if (im[i] < 0) continue;
8155       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8156       if (im[i] >= rstart && im[i] < rend) {
8157         row      = im[i] - rstart;
8158         lastcol1 = -1;
8159         rp1      = aj + ai[row];
8160         ap1      = aa + ai[row];
8161         rmax1    = aimax[row];
8162         nrow1    = ailen[row];
8163         low1     = 0;
8164         high1    = nrow1;
8165         lastcol2 = -1;
8166         rp2      = bj + bi[row];
8167         ap2      = ba + bi[row];
8168         rmax2    = bimax[row];
8169         nrow2    = bilen[row];
8170         low2     = 0;
8171         high2    = nrow2;
8172 
8173         for (j = 0; j < n; j++) {
8174           if (roworiented) value = v[i * n + j];
8175           else value = v[i + j * m];
8176           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8177           if (in[j] >= cstart && in[j] < cend) {
8178             col = in[j] - cstart;
8179             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8180           } else if (in[j] < 0) continue;
8181           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8182             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8183           } else {
8184             if (mat->was_assembled) {
8185               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8186 #if defined(PETSC_USE_CTABLE)
8187               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8188               col--;
8189 #else
8190               col = aij->colmap[in[j]] - 1;
8191 #endif
8192               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8193                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8194                 col = in[j];
8195                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8196                 B        = aij->B;
8197                 b        = (Mat_SeqAIJ *)B->data;
8198                 bimax    = b->imax;
8199                 bi       = b->i;
8200                 bilen    = b->ilen;
8201                 bj       = b->j;
8202                 rp2      = bj + bi[row];
8203                 ap2      = ba + bi[row];
8204                 rmax2    = bimax[row];
8205                 nrow2    = bilen[row];
8206                 low2     = 0;
8207                 high2    = nrow2;
8208                 bm       = aij->B->rmap->n;
8209                 ba       = b->a;
8210                 inserted = PETSC_FALSE;
8211               }
8212             } else col = in[j];
8213             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8214           }
8215         }
8216       } else if (!aij->donotstash) {
8217         if (roworiented) {
8218           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8219         } else {
8220           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8221         }
8222       }
8223     }
8224     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8225     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8226   }
8227   PetscFunctionReturnVoid();
8228 }
8229 
8230 /* Undefining these here since they were redefined from their original definition above! No
8231  * other PETSc functions should be defined past this point, as it is impossible to recover the
8232  * original definitions */
8233 #undef PetscCall
8234 #undef SETERRQ
8235