xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b2ccae6bdc8edea944f1c160ca3b2eb32c69ecb2)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */
10 #define TYPE AIJ
11 #define TYPE_AIJ
12 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
13 #undef TYPE
14 #undef TYPE_AIJ
15 
16 static PetscErrorCode MatReset_MPIAIJ(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
19 
20   PetscFunctionBegin;
21   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
22   PetscCall(MatStashDestroy_Private(&mat->stash));
23   PetscCall(VecDestroy(&aij->diag));
24   PetscCall(MatDestroy(&aij->A));
25   PetscCall(MatDestroy(&aij->B));
26 #if defined(PETSC_USE_CTABLE)
27   PetscCall(PetscHMapIDestroy(&aij->colmap));
28 #else
29   PetscCall(PetscFree(aij->colmap));
30 #endif
31   PetscCall(PetscFree(aij->garray));
32   PetscCall(VecDestroy(&aij->lvec));
33   PetscCall(VecScatterDestroy(&aij->Mvctx));
34   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
35   PetscCall(PetscFree(aij->ld));
36   PetscFunctionReturn(PETSC_SUCCESS);
37 }
38 
39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
42   /* Save the nonzero states of the component matrices because those are what are used to determine
43     the nonzero state of mat */
44   PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate;
45 
46   PetscFunctionBegin;
47   PetscCall(MatReset_MPIAIJ(mat));
48   PetscCall(MatSetUp_MPI_Hash(mat));
49   aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate;
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
54 {
55   PetscFunctionBegin;
56   PetscCall(MatReset_MPIAIJ(mat));
57 
58   PetscCall(PetscFree(mat->data));
59 
60   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
61   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
62 
63   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
74 #if defined(PETSC_HAVE_CUDA)
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
76 #endif
77 #if defined(PETSC_HAVE_HIP)
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
79 #endif
80 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
82 #endif
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
84 #if defined(PETSC_HAVE_ELEMENTAL)
85   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
86 #endif
87 #if defined(PETSC_HAVE_SCALAPACK)
88   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
89 #endif
90 #if defined(PETSC_HAVE_HYPRE)
91   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
92   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
93 #endif
94   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
95   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
96   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
97   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
98   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
99   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
100 #if defined(PETSC_HAVE_MKL_SPARSE)
101   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
102 #endif
103   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
104   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
105   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
106   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
107   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
112 {
113   Mat B;
114 
115   PetscFunctionBegin;
116   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
117   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
118   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
119   PetscCall(MatDestroy(&B));
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
124 {
125   Mat B;
126 
127   PetscFunctionBegin;
128   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
129   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
130   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*MC
135    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
136 
137    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
138    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
139   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
140   for communicators controlling multiple processes.  It is recommended that you call both of
141   the above preallocation routines for simplicity.
142 
143    Options Database Key:
144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
145 
146   Developer Note:
147   Level: beginner
148 
149     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
150    enough exist.
151 
152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
153 M*/
154 
155 /*MC
156    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
157 
158    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
159    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
160    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
161   for communicators controlling multiple processes.  It is recommended that you call both of
162   the above preallocation routines for simplicity.
163 
164    Options Database Key:
165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
166 
167   Level: beginner
168 
169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
170 M*/
171 
172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
173 {
174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
175 
176   PetscFunctionBegin;
177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
178   A->boundtocpu = flg;
179 #endif
180   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
181   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
182 
183   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
184    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
185    * to differ from the parent matrix. */
186   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
187   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
192 {
193   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
194 
195   PetscFunctionBegin;
196   if (mat->A) {
197     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
198     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
199   }
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
204 {
205   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
206   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
207   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
208   const PetscInt  *ia, *ib;
209   const MatScalar *aa, *bb, *aav, *bav;
210   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
211   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
212 
213   PetscFunctionBegin;
214   *keptrows = NULL;
215 
216   ia = a->i;
217   ib = b->i;
218   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
219   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
220   for (i = 0; i < m; i++) {
221     na = ia[i + 1] - ia[i];
222     nb = ib[i + 1] - ib[i];
223     if (!na && !nb) {
224       cnt++;
225       goto ok1;
226     }
227     aa = aav + ia[i];
228     for (j = 0; j < na; j++) {
229       if (aa[j] != 0.0) goto ok1;
230     }
231     bb = PetscSafePointerPlusOffset(bav, ib[i]);
232     for (j = 0; j < nb; j++) {
233       if (bb[j] != 0.0) goto ok1;
234     }
235     cnt++;
236   ok1:;
237   }
238   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
239   if (!n0rows) {
240     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
241     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
242     PetscFunctionReturn(PETSC_SUCCESS);
243   }
244   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
245   cnt = 0;
246   for (i = 0; i < m; i++) {
247     na = ia[i + 1] - ia[i];
248     nb = ib[i + 1] - ib[i];
249     if (!na && !nb) continue;
250     aa = aav + ia[i];
251     for (j = 0; j < na; j++) {
252       if (aa[j] != 0.0) {
253         rows[cnt++] = rstart + i;
254         goto ok2;
255       }
256     }
257     bb = PetscSafePointerPlusOffset(bav, ib[i]);
258     for (j = 0; j < nb; j++) {
259       if (bb[j] != 0.0) {
260         rows[cnt++] = rstart + i;
261         goto ok2;
262       }
263     }
264   ok2:;
265   }
266   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
267   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
268   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
275   PetscBool   cong;
276 
277   PetscFunctionBegin;
278   PetscCall(MatHasCongruentLayouts(Y, &cong));
279   if (Y->assembled && cong) {
280     PetscCall(MatDiagonalSet(aij->A, D, is));
281   } else {
282     PetscCall(MatDiagonalSet_Default(Y, D, is));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
288 {
289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
290   PetscInt    i, rstart, nrows, *rows;
291 
292   PetscFunctionBegin;
293   *zrows = NULL;
294   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
295   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
296   for (i = 0; i < nrows; i++) rows[i] += rstart;
297   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
302 {
303   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
304   PetscInt           i, m, n, *garray = aij->garray;
305   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
306   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
307   PetscReal         *work;
308   const PetscScalar *dummy;
309 
310   PetscFunctionBegin;
311   PetscCall(MatGetSize(A, &m, &n));
312   PetscCall(PetscCalloc1(n, &work));
313   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
314   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
315   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
316   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
317   if (type == NORM_2) {
318     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
319     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
320   } else if (type == NORM_1) {
321     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
322     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
323   } else if (type == NORM_INFINITY) {
324     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
325     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
326   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
327     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
328     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
329   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
330     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
331     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
332   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
333   if (type == NORM_INFINITY) {
334     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
335   } else {
336     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
337   }
338   PetscCall(PetscFree(work));
339   if (type == NORM_2) {
340     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
341   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
342     for (i = 0; i < n; i++) reductions[i] /= m;
343   }
344   PetscFunctionReturn(PETSC_SUCCESS);
345 }
346 
347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
348 {
349   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
350   IS              sis, gis;
351   const PetscInt *isis, *igis;
352   PetscInt        n, *iis, nsis, ngis, rstart, i;
353 
354   PetscFunctionBegin;
355   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
356   PetscCall(MatFindNonzeroRows(a->B, &gis));
357   PetscCall(ISGetSize(gis, &ngis));
358   PetscCall(ISGetSize(sis, &nsis));
359   PetscCall(ISGetIndices(sis, &isis));
360   PetscCall(ISGetIndices(gis, &igis));
361 
362   PetscCall(PetscMalloc1(ngis + nsis, &iis));
363   PetscCall(PetscArraycpy(iis, igis, ngis));
364   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
365   n = ngis + nsis;
366   PetscCall(PetscSortRemoveDupsInt(&n, iis));
367   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
368   for (i = 0; i < n; i++) iis[i] += rstart;
369   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
370 
371   PetscCall(ISRestoreIndices(sis, &isis));
372   PetscCall(ISRestoreIndices(gis, &igis));
373   PetscCall(ISDestroy(&sis));
374   PetscCall(ISDestroy(&gis));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 /*
379   Local utility routine that creates a mapping from the global column
380 number to the local number in the off-diagonal part of the local
381 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
382 a slightly higher hash table cost; without it it is not scalable (each processor
383 has an order N integer array but is fast to access.
384 */
385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
386 {
387   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
388   PetscInt    n   = aij->B->cmap->n, i;
389 
390   PetscFunctionBegin;
391   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
392 #if defined(PETSC_USE_CTABLE)
393   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
394   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
395 #else
396   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
397   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
398 #endif
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
403   do { \
404     if (col <= lastcol1) low1 = 0; \
405     else high1 = nrow1; \
406     lastcol1 = col; \
407     while (high1 - low1 > 5) { \
408       t = (low1 + high1) / 2; \
409       if (rp1[t] > col) high1 = t; \
410       else low1 = t; \
411     } \
412     for (_i = low1; _i < high1; _i++) { \
413       if (rp1[_i] > col) break; \
414       if (rp1[_i] == col) { \
415         if (addv == ADD_VALUES) { \
416           ap1[_i] += value; \
417           /* Not sure LogFlops will slow down the code or not */ \
418           (void)PetscLogFlops(1.0); \
419         } else ap1[_i] = value; \
420         goto a_noinsert; \
421       } \
422     } \
423     if (value == 0.0 && ignorezeroentries && row != col) { \
424       low1  = 0; \
425       high1 = nrow1; \
426       goto a_noinsert; \
427     } \
428     if (nonew == 1) { \
429       low1  = 0; \
430       high1 = nrow1; \
431       goto a_noinsert; \
432     } \
433     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
434     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
435     N = nrow1++ - 1; \
436     a->nz++; \
437     high1++; \
438     /* shift up all the later entries in this row */ \
439     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
440     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
441     rp1[_i] = col; \
442     ap1[_i] = value; \
443   a_noinsert:; \
444     ailen[row] = nrow1; \
445   } while (0)
446 
447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
448   do { \
449     if (col <= lastcol2) low2 = 0; \
450     else high2 = nrow2; \
451     lastcol2 = col; \
452     while (high2 - low2 > 5) { \
453       t = (low2 + high2) / 2; \
454       if (rp2[t] > col) high2 = t; \
455       else low2 = t; \
456     } \
457     for (_i = low2; _i < high2; _i++) { \
458       if (rp2[_i] > col) break; \
459       if (rp2[_i] == col) { \
460         if (addv == ADD_VALUES) { \
461           ap2[_i] += value; \
462           (void)PetscLogFlops(1.0); \
463         } else ap2[_i] = value; \
464         goto b_noinsert; \
465       } \
466     } \
467     if (value == 0.0 && ignorezeroentries) { \
468       low2  = 0; \
469       high2 = nrow2; \
470       goto b_noinsert; \
471     } \
472     if (nonew == 1) { \
473       low2  = 0; \
474       high2 = nrow2; \
475       goto b_noinsert; \
476     } \
477     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
478     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
479     N = nrow2++ - 1; \
480     b->nz++; \
481     high2++; \
482     /* shift up all the later entries in this row */ \
483     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
484     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
485     rp2[_i] = col; \
486     ap2[_i] = value; \
487   b_noinsert:; \
488     bilen[row] = nrow2; \
489   } while (0)
490 
491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
492 {
493   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
494   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
495   PetscInt     l, *garray                         = mat->garray, diag;
496   PetscScalar *aa, *ba;
497 
498   PetscFunctionBegin;
499   /* code only works for square matrices A */
500 
501   /* find size of row to the left of the diagonal part */
502   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
503   row = row - diag;
504   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
505     if (garray[b->j[b->i[row] + l]] > diag) break;
506   }
507   if (l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512 
513   /* diagonal part */
514   if (a->i[row + 1] - a->i[row]) {
515     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
516     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
517     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
518   }
519 
520   /* right of diagonal part */
521   if (b->i[row + 1] - b->i[row] - l) {
522     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
523     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
524     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
525   }
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
530 {
531   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
532   PetscScalar value = 0.0;
533   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
534   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
535   PetscBool   roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat         A     = aij->A;
539   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
540   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
541   PetscBool   ignorezeroentries = a->ignorezeroentries;
542   Mat         B                 = aij->B;
543   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
544   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
545   MatScalar  *aa, *ba;
546   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
547   PetscInt    nonew;
548   MatScalar  *ap1, *ap2;
549 
550   PetscFunctionBegin;
551   PetscCall(MatSeqAIJGetArray(A, &aa));
552   PetscCall(MatSeqAIJGetArray(B, &ba));
553   for (i = 0; i < m; i++) {
554     if (im[i] < 0) continue;
555     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
560       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
567       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j = 0; j < n; j++) {
574         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
575         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
580         } else if (in[j] < 0) {
581           continue;
582         } else {
583           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
584           if (mat->was_assembled) {
585             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
586 #if defined(PETSC_USE_CTABLE)
587             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
593               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
594               col = in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ *)B->data;
598               bimax = b->imax;
599               bi    = b->i;
600               bilen = b->ilen;
601               bj    = b->j;
602               ba    = b->a;
603               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
604               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
612               PetscCheck(1 == ((Mat_SeqAIJ *)aij->B->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
613               PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
618         }
619       }
620     } else {
621       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
626         } else {
627           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
628         }
629       }
630     }
631   }
632   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
633   PetscCall(MatSeqAIJRestoreArray(B, &ba));
634   PetscFunctionReturn(PETSC_SUCCESS);
635 }
636 
637 /*
638     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
639     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
640     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
641 */
642 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
643 {
644   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
645   Mat         A      = aij->A; /* diagonal part of the matrix */
646   Mat         B      = aij->B; /* off-diagonal part of the matrix */
647   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
648   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
649   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
650   PetscInt   *ailen = a->ilen, *aj = a->j;
651   PetscInt   *bilen = b->ilen, *bj = b->j;
652   PetscInt    am          = aij->A->rmap->n, j;
653   PetscInt    diag_so_far = 0, dnz;
654   PetscInt    offd_so_far = 0, onz;
655 
656   PetscFunctionBegin;
657   /* Iterate over all rows of the matrix */
658   for (j = 0; j < am; j++) {
659     dnz = onz = 0;
660     /*  Iterate over all non-zero columns of the current row */
661     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
662       /* If column is in the diagonal */
663       if (mat_j[col] >= cstart && mat_j[col] < cend) {
664         aj[diag_so_far++] = mat_j[col] - cstart;
665         dnz++;
666       } else { /* off-diagonal entries */
667         bj[offd_so_far++] = mat_j[col];
668         onz++;
669       }
670     }
671     ailen[j] = dnz;
672     bilen[j] = onz;
673   }
674   PetscFunctionReturn(PETSC_SUCCESS);
675 }
676 
677 /*
678     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
679     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
680     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
681     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
682     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
683 */
684 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
685 {
686   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
687   Mat          A    = aij->A; /* diagonal part of the matrix */
688   Mat          B    = aij->B; /* off-diagonal part of the matrix */
689   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
690   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
691   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
692   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
693   PetscInt    *ailen = a->ilen, *aj = a->j;
694   PetscInt    *bilen = b->ilen, *bj = b->j;
695   PetscInt     am          = aij->A->rmap->n, j;
696   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
697   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
698   PetscScalar *aa = a->a, *ba = b->a;
699 
700   PetscFunctionBegin;
701   /* Iterate over all rows of the matrix */
702   for (j = 0; j < am; j++) {
703     dnz_row = onz_row = 0;
704     rowstart_offd     = full_offd_i[j];
705     rowstart_diag     = full_diag_i[j];
706     /*  Iterate over all non-zero columns of the current row */
707     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
708       /* If column is in the diagonal */
709       if (mat_j[col] >= cstart && mat_j[col] < cend) {
710         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
711         aa[rowstart_diag + dnz_row] = mat_a[col];
712         dnz_row++;
713       } else { /* off-diagonal entries */
714         bj[rowstart_offd + onz_row] = mat_j[col];
715         ba[rowstart_offd + onz_row] = mat_a[col];
716         onz_row++;
717       }
718     }
719     ailen[j] = dnz_row;
720     bilen[j] = onz_row;
721   }
722   PetscFunctionReturn(PETSC_SUCCESS);
723 }
724 
725 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
726 {
727   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
728   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
729   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
730 
731   PetscFunctionBegin;
732   for (i = 0; i < m; i++) {
733     if (idxm[i] < 0) continue; /* negative row */
734     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
735     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
736     row = idxm[i] - rstart;
737     for (j = 0; j < n; j++) {
738       if (idxn[j] < 0) continue; /* negative column */
739       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
740       if (idxn[j] >= cstart && idxn[j] < cend) {
741         col = idxn[j] - cstart;
742         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
743       } else {
744         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
745 #if defined(PETSC_USE_CTABLE)
746         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
747         col--;
748 #else
749         col = aij->colmap[idxn[j]] - 1;
750 #endif
751         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
752         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
753       }
754     }
755   }
756   PetscFunctionReturn(PETSC_SUCCESS);
757 }
758 
759 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
760 {
761   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
762   PetscInt    nstash, reallocs;
763 
764   PetscFunctionBegin;
765   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
766 
767   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
768   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
769   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
770   PetscFunctionReturn(PETSC_SUCCESS);
771 }
772 
773 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
774 {
775   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
776   PetscMPIInt  n;
777   PetscInt     i, j, rstart, ncols, flg;
778   PetscInt    *row, *col;
779   PetscBool    all_assembled;
780   PetscScalar *val;
781 
782   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
783 
784   PetscFunctionBegin;
785   if (!aij->donotstash && !mat->nooffprocentries) {
786     while (1) {
787       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
788       if (!flg) break;
789 
790       for (i = 0; i < n;) {
791         /* Now identify the consecutive vals belonging to the same row */
792         for (j = i, rstart = row[j]; j < n; j++) {
793           if (row[j] != rstart) break;
794         }
795         if (j < n) ncols = j - i;
796         else ncols = n - i;
797         /* Now assemble all these values with a single function call */
798         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
799         i = j;
800       }
801     }
802     PetscCall(MatStashScatterEnd_Private(&mat->stash));
803   }
804 #if defined(PETSC_HAVE_DEVICE)
805   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
806   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
807   if (mat->boundtocpu) {
808     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
809     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
810   }
811 #endif
812   PetscCall(MatAssemblyBegin(aij->A, mode));
813   PetscCall(MatAssemblyEnd(aij->A, mode));
814 
815   /* determine if any process has disassembled, if so we must
816      also disassemble ourself, in order that we may reassemble. */
817   /*
818      if nonzero structure of submatrix B cannot change then we know that
819      no process disassembled thus we can skip this stuff
820   */
821   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
822     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
823     if (mat->was_assembled && !all_assembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
824       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
825     }
826   }
827   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
828   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
829 #if defined(PETSC_HAVE_DEVICE)
830   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
831 #endif
832   PetscCall(MatAssemblyBegin(aij->B, mode));
833   PetscCall(MatAssemblyEnd(aij->B, mode));
834 
835   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
836 
837   aij->rowvalues = NULL;
838 
839   PetscCall(VecDestroy(&aij->diag));
840 
841   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
842   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
843     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
844     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
845   }
846 #if defined(PETSC_HAVE_DEVICE)
847   mat->offloadmask = PETSC_OFFLOAD_BOTH;
848 #endif
849   PetscFunctionReturn(PETSC_SUCCESS);
850 }
851 
852 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
853 {
854   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
855 
856   PetscFunctionBegin;
857   PetscCall(MatZeroEntries(l->A));
858   PetscCall(MatZeroEntries(l->B));
859   PetscFunctionReturn(PETSC_SUCCESS);
860 }
861 
862 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
863 {
864   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
865   PetscInt   *lrows;
866   PetscInt    r, len;
867   PetscBool   cong;
868 
869   PetscFunctionBegin;
870   /* get locally owned rows */
871   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
872   PetscCall(MatHasCongruentLayouts(A, &cong));
873   /* fix right-hand side if needed */
874   if (x && b) {
875     const PetscScalar *xx;
876     PetscScalar       *bb;
877 
878     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
879     PetscCall(VecGetArrayRead(x, &xx));
880     PetscCall(VecGetArray(b, &bb));
881     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
882     PetscCall(VecRestoreArrayRead(x, &xx));
883     PetscCall(VecRestoreArray(b, &bb));
884   }
885 
886   if (diag != 0.0 && cong) {
887     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
888     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
889   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
890     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
891     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
892     PetscInt    nnwA, nnwB;
893     PetscBool   nnzA, nnzB;
894 
895     nnwA = aijA->nonew;
896     nnwB = aijB->nonew;
897     nnzA = aijA->keepnonzeropattern;
898     nnzB = aijB->keepnonzeropattern;
899     if (!nnzA) {
900       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
901       aijA->nonew = 0;
902     }
903     if (!nnzB) {
904       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
905       aijB->nonew = 0;
906     }
907     /* Must zero here before the next loop */
908     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
909     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
910     for (r = 0; r < len; ++r) {
911       const PetscInt row = lrows[r] + A->rmap->rstart;
912       if (row >= A->cmap->N) continue;
913       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
914     }
915     aijA->nonew = nnwA;
916     aijB->nonew = nnwB;
917   } else {
918     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
919     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
920   }
921   PetscCall(PetscFree(lrows));
922   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
923   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
927     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
928     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
929   }
930   PetscFunctionReturn(PETSC_SUCCESS);
931 }
932 
933 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
934 {
935   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
936   PetscInt           n = A->rmap->n;
937   PetscInt           i, j, r, m, len = 0;
938   PetscInt          *lrows, *owners = A->rmap->range;
939   PetscMPIInt        p = 0;
940   PetscSFNode       *rrows;
941   PetscSF            sf;
942   const PetscScalar *xx;
943   PetscScalar       *bb, *mask, *aij_a;
944   Vec                xmask, lmask;
945   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
946   const PetscInt    *aj, *ii, *ridx;
947   PetscScalar       *aa;
948 
949   PetscFunctionBegin;
950   /* Create SF where leaves are input rows and roots are owned rows */
951   PetscCall(PetscMalloc1(n, &lrows));
952   for (r = 0; r < n; ++r) lrows[r] = -1;
953   PetscCall(PetscMalloc1(N, &rrows));
954   for (r = 0; r < N; ++r) {
955     const PetscInt idx = rows[r];
956     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
957     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
958       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
959     }
960     rrows[r].rank  = p;
961     rrows[r].index = rows[r] - owners[p];
962   }
963   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
964   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
965   /* Collect flags for rows to be zeroed */
966   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
967   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
968   PetscCall(PetscSFDestroy(&sf));
969   /* Compress and put in row numbers */
970   for (r = 0; r < n; ++r)
971     if (lrows[r] >= 0) lrows[len++] = r;
972   /* zero diagonal part of matrix */
973   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
974   /* handle off-diagonal part of matrix */
975   PetscCall(MatCreateVecs(A, &xmask, NULL));
976   PetscCall(VecDuplicate(l->lvec, &lmask));
977   PetscCall(VecGetArray(xmask, &bb));
978   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
979   PetscCall(VecRestoreArray(xmask, &bb));
980   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
981   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
982   PetscCall(VecDestroy(&xmask));
983   if (x && b) { /* this code is buggy when the row and column layout don't match */
984     PetscBool cong;
985 
986     PetscCall(MatHasCongruentLayouts(A, &cong));
987     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
988     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
989     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
990     PetscCall(VecGetArrayRead(l->lvec, &xx));
991     PetscCall(VecGetArray(b, &bb));
992   }
993   PetscCall(VecGetArray(lmask, &mask));
994   /* remove zeroed rows of off-diagonal matrix */
995   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
996   ii = aij->i;
997   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
998   /* loop over all elements of off process part of matrix zeroing removed columns*/
999   if (aij->compressedrow.use) {
1000     m    = aij->compressedrow.nrows;
1001     ii   = aij->compressedrow.i;
1002     ridx = aij->compressedrow.rindex;
1003     for (i = 0; i < m; i++) {
1004       n  = ii[i + 1] - ii[i];
1005       aj = aij->j + ii[i];
1006       aa = aij_a + ii[i];
1007 
1008       for (j = 0; j < n; j++) {
1009         if (PetscAbsScalar(mask[*aj])) {
1010           if (b) bb[*ridx] -= *aa * xx[*aj];
1011           *aa = 0.0;
1012         }
1013         aa++;
1014         aj++;
1015       }
1016       ridx++;
1017     }
1018   } else { /* do not use compressed row format */
1019     m = l->B->rmap->n;
1020     for (i = 0; i < m; i++) {
1021       n  = ii[i + 1] - ii[i];
1022       aj = aij->j + ii[i];
1023       aa = aij_a + ii[i];
1024       for (j = 0; j < n; j++) {
1025         if (PetscAbsScalar(mask[*aj])) {
1026           if (b) bb[i] -= *aa * xx[*aj];
1027           *aa = 0.0;
1028         }
1029         aa++;
1030         aj++;
1031       }
1032     }
1033   }
1034   if (x && b) {
1035     PetscCall(VecRestoreArray(b, &bb));
1036     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1037   }
1038   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1039   PetscCall(VecRestoreArray(lmask, &mask));
1040   PetscCall(VecDestroy(&lmask));
1041   PetscCall(PetscFree(lrows));
1042 
1043   /* only change matrix nonzero state if pattern was allowed to be changed */
1044   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1045     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1046     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1047   }
1048   PetscFunctionReturn(PETSC_SUCCESS);
1049 }
1050 
1051 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1052 {
1053   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1054   PetscInt    nt;
1055   VecScatter  Mvctx = a->Mvctx;
1056 
1057   PetscFunctionBegin;
1058   PetscCall(VecGetLocalSize(xx, &nt));
1059   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1060   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1061   PetscUseTypeMethod(a->A, mult, xx, yy);
1062   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1063   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1064   PetscFunctionReturn(PETSC_SUCCESS);
1065 }
1066 
1067 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1068 {
1069   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1070 
1071   PetscFunctionBegin;
1072   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1073   PetscFunctionReturn(PETSC_SUCCESS);
1074 }
1075 
1076 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1077 {
1078   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1079   VecScatter  Mvctx = a->Mvctx;
1080 
1081   PetscFunctionBegin;
1082   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1083   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1084   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1085   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
1089 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1090 {
1091   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1092 
1093   PetscFunctionBegin;
1094   /* do nondiagonal part */
1095   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1096   /* do local part */
1097   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1098   /* add partial results together */
1099   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1100   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1101   PetscFunctionReturn(PETSC_SUCCESS);
1102 }
1103 
1104 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1105 {
1106   MPI_Comm    comm;
1107   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1108   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1109   IS          Me, Notme;
1110   PetscInt    M, N, first, last, *notme, i;
1111   PetscBool   lf;
1112   PetscMPIInt size;
1113 
1114   PetscFunctionBegin;
1115   /* Easy test: symmetric diagonal block */
1116   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1117   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1118   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1119   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1120   PetscCallMPI(MPI_Comm_size(comm, &size));
1121   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1122 
1123   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1124   PetscCall(MatGetSize(Amat, &M, &N));
1125   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1126   PetscCall(PetscMalloc1(N - last + first, &notme));
1127   for (i = 0; i < first; i++) notme[i] = i;
1128   for (i = last; i < M; i++) notme[i - last + first] = i;
1129   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1130   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1131   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1132   Aoff = Aoffs[0];
1133   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1134   Boff = Boffs[0];
1135   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1136   PetscCall(MatDestroyMatrices(1, &Aoffs));
1137   PetscCall(MatDestroyMatrices(1, &Boffs));
1138   PetscCall(ISDestroy(&Me));
1139   PetscCall(ISDestroy(&Notme));
1140   PetscCall(PetscFree(notme));
1141   PetscFunctionReturn(PETSC_SUCCESS);
1142 }
1143 
1144 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1145 {
1146   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1147 
1148   PetscFunctionBegin;
1149   /* do nondiagonal part */
1150   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1151   /* do local part */
1152   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1153   /* add partial results together */
1154   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1155   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1156   PetscFunctionReturn(PETSC_SUCCESS);
1157 }
1158 
1159 /*
1160   This only works correctly for square matrices where the subblock A->A is the
1161    diagonal block
1162 */
1163 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1164 {
1165   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1166 
1167   PetscFunctionBegin;
1168   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1169   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1170   PetscCall(MatGetDiagonal(a->A, v));
1171   PetscFunctionReturn(PETSC_SUCCESS);
1172 }
1173 
1174 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1175 {
1176   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1177 
1178   PetscFunctionBegin;
1179   PetscCall(MatScale(a->A, aa));
1180   PetscCall(MatScale(a->B, aa));
1181   PetscFunctionReturn(PETSC_SUCCESS);
1182 }
1183 
1184 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1185 {
1186   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1187   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1188   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1189   const PetscInt    *garray = aij->garray;
1190   const PetscScalar *aa, *ba;
1191   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1192   PetscInt64         nz, hnz;
1193   PetscInt          *rowlens;
1194   PetscInt          *colidxs;
1195   PetscScalar       *matvals;
1196   PetscMPIInt        rank;
1197 
1198   PetscFunctionBegin;
1199   PetscCall(PetscViewerSetUp(viewer));
1200 
1201   M  = mat->rmap->N;
1202   N  = mat->cmap->N;
1203   m  = mat->rmap->n;
1204   rs = mat->rmap->rstart;
1205   cs = mat->cmap->rstart;
1206   nz = A->nz + B->nz;
1207 
1208   /* write matrix header */
1209   header[0] = MAT_FILE_CLASSID;
1210   header[1] = M;
1211   header[2] = N;
1212   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1213   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1214   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1215   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1216 
1217   /* fill in and store row lengths  */
1218   PetscCall(PetscMalloc1(m, &rowlens));
1219   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1220   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1221   PetscCall(PetscFree(rowlens));
1222 
1223   /* fill in and store column indices */
1224   PetscCall(PetscMalloc1(nz, &colidxs));
1225   for (cnt = 0, i = 0; i < m; i++) {
1226     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1227       if (garray[B->j[jb]] > cs) break;
1228       colidxs[cnt++] = garray[B->j[jb]];
1229     }
1230     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1231     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1232   }
1233   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1234   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1235   PetscCall(PetscFree(colidxs));
1236 
1237   /* fill in and store nonzero values */
1238   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1239   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1240   PetscCall(PetscMalloc1(nz, &matvals));
1241   for (cnt = 0, i = 0; i < m; i++) {
1242     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1243       if (garray[B->j[jb]] > cs) break;
1244       matvals[cnt++] = ba[jb];
1245     }
1246     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1247     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1248   }
1249   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1250   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1251   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1252   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1253   PetscCall(PetscFree(matvals));
1254 
1255   /* write block size option to the viewer's .info file */
1256   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1257   PetscFunctionReturn(PETSC_SUCCESS);
1258 }
1259 
1260 #include <petscdraw.h>
1261 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1262 {
1263   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1264   PetscMPIInt       rank = aij->rank, size = aij->size;
1265   PetscBool         isdraw, isascii, isbinary;
1266   PetscViewer       sviewer;
1267   PetscViewerFormat format;
1268 
1269   PetscFunctionBegin;
1270   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1271   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1272   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1273   if (isascii) {
1274     PetscCall(PetscViewerGetFormat(viewer, &format));
1275     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1276       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1277       PetscCall(PetscMalloc1(size, &nz));
1278       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1279       for (i = 0; i < size; i++) {
1280         nmax = PetscMax(nmax, nz[i]);
1281         nmin = PetscMin(nmin, nz[i]);
1282         navg += nz[i];
1283       }
1284       PetscCall(PetscFree(nz));
1285       navg = navg / size;
1286       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1287       PetscFunctionReturn(PETSC_SUCCESS);
1288     }
1289     PetscCall(PetscViewerGetFormat(viewer, &format));
1290     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1291       MatInfo   info;
1292       PetscInt *inodes = NULL;
1293 
1294       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1295       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1296       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1297       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1298       if (!inodes) {
1299         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1300                                                      info.memory));
1301       } else {
1302         PetscCall(
1303           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory));
1304       }
1305       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1306       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1307       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1308       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1309       PetscCall(PetscViewerFlush(viewer));
1310       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1311       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1312       PetscCall(VecScatterView(aij->Mvctx, viewer));
1313       PetscFunctionReturn(PETSC_SUCCESS);
1314     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1315       PetscInt inodecount, inodelimit, *inodes;
1316       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1317       if (inodes) {
1318         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1319       } else {
1320         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1321       }
1322       PetscFunctionReturn(PETSC_SUCCESS);
1323     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1324       PetscFunctionReturn(PETSC_SUCCESS);
1325     }
1326   } else if (isbinary) {
1327     if (size == 1) {
1328       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1329       PetscCall(MatView(aij->A, viewer));
1330     } else {
1331       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1332     }
1333     PetscFunctionReturn(PETSC_SUCCESS);
1334   } else if (isascii && size == 1) {
1335     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1336     PetscCall(MatView(aij->A, viewer));
1337     PetscFunctionReturn(PETSC_SUCCESS);
1338   } else if (isdraw) {
1339     PetscDraw draw;
1340     PetscBool isnull;
1341     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1342     PetscCall(PetscDrawIsNull(draw, &isnull));
1343     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1344   }
1345 
1346   { /* assemble the entire matrix onto first processor */
1347     Mat A = NULL, Av;
1348     IS  isrow, iscol;
1349 
1350     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1351     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1352     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1353     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1354     /*  The commented code uses MatCreateSubMatrices instead */
1355     /*
1356     Mat *AA, A = NULL, Av;
1357     IS  isrow,iscol;
1358 
1359     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1360     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1361     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1362     if (rank == 0) {
1363        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1364        A    = AA[0];
1365        Av   = AA[0];
1366     }
1367     PetscCall(MatDestroySubMatrices(1,&AA));
1368 */
1369     PetscCall(ISDestroy(&iscol));
1370     PetscCall(ISDestroy(&isrow));
1371     /*
1372        Everyone has to call to draw the matrix since the graphics waits are
1373        synchronized across all processors that share the PetscDraw object
1374     */
1375     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1376     if (rank == 0) {
1377       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1378       PetscCall(MatView_SeqAIJ(Av, sviewer));
1379     }
1380     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1381     PetscCall(MatDestroy(&A));
1382   }
1383   PetscFunctionReturn(PETSC_SUCCESS);
1384 }
1385 
1386 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1387 {
1388   PetscBool isascii, isdraw, issocket, isbinary;
1389 
1390   PetscFunctionBegin;
1391   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1392   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1393   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1395   if (isascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1396   PetscFunctionReturn(PETSC_SUCCESS);
1397 }
1398 
1399 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1400 {
1401   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1402   Vec         bb1 = NULL;
1403   PetscBool   hasop;
1404 
1405   PetscFunctionBegin;
1406   if (flag == SOR_APPLY_UPPER) {
1407     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1408     PetscFunctionReturn(PETSC_SUCCESS);
1409   }
1410 
1411   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1412 
1413   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1414     if (flag & SOR_ZERO_INITIAL_GUESS) {
1415       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1416       its--;
1417     }
1418 
1419     while (its--) {
1420       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1421       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422 
1423       /* update rhs: bb1 = bb - B*x */
1424       PetscCall(VecScale(mat->lvec, -1.0));
1425       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1426 
1427       /* local sweep */
1428       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1429     }
1430   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1433       its--;
1434     }
1435     while (its--) {
1436       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438 
1439       /* update rhs: bb1 = bb - B*x */
1440       PetscCall(VecScale(mat->lvec, -1.0));
1441       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1442 
1443       /* local sweep */
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1445     }
1446   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1447     if (flag & SOR_ZERO_INITIAL_GUESS) {
1448       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1449       its--;
1450     }
1451     while (its--) {
1452       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454 
1455       /* update rhs: bb1 = bb - B*x */
1456       PetscCall(VecScale(mat->lvec, -1.0));
1457       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1458 
1459       /* local sweep */
1460       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1461     }
1462   } else if (flag & SOR_EISENSTAT) {
1463     Vec xx1;
1464 
1465     PetscCall(VecDuplicate(bb, &xx1));
1466     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1467 
1468     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1469     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1470     if (!mat->diag) {
1471       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1472       PetscCall(MatGetDiagonal(matin, mat->diag));
1473     }
1474     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1475     if (hasop) {
1476       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1477     } else {
1478       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1479     }
1480     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1481 
1482     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1483 
1484     /* local sweep */
1485     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1486     PetscCall(VecAXPY(xx, 1.0, xx1));
1487     PetscCall(VecDestroy(&xx1));
1488   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1489 
1490   PetscCall(VecDestroy(&bb1));
1491 
1492   matin->factorerrortype = mat->A->factorerrortype;
1493   PetscFunctionReturn(PETSC_SUCCESS);
1494 }
1495 
1496 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1497 {
1498   Mat             aA, aB, Aperm;
1499   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1500   PetscScalar    *aa, *ba;
1501   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1502   PetscSF         rowsf, sf;
1503   IS              parcolp = NULL;
1504   PetscBool       done;
1505 
1506   PetscFunctionBegin;
1507   PetscCall(MatGetLocalSize(A, &m, &n));
1508   PetscCall(ISGetIndices(rowp, &rwant));
1509   PetscCall(ISGetIndices(colp, &cwant));
1510   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1511 
1512   /* Invert row permutation to find out where my rows should go */
1513   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1514   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1515   PetscCall(PetscSFSetFromOptions(rowsf));
1516   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1517   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1518   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1519 
1520   /* Invert column permutation to find out where my columns should go */
1521   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1522   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1523   PetscCall(PetscSFSetFromOptions(sf));
1524   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1525   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1526   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1527   PetscCall(PetscSFDestroy(&sf));
1528 
1529   PetscCall(ISRestoreIndices(rowp, &rwant));
1530   PetscCall(ISRestoreIndices(colp, &cwant));
1531   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1532 
1533   /* Find out where my gcols should go */
1534   PetscCall(MatGetSize(aB, NULL, &ng));
1535   PetscCall(PetscMalloc1(ng, &gcdest));
1536   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1537   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1538   PetscCall(PetscSFSetFromOptions(sf));
1539   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1540   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1541   PetscCall(PetscSFDestroy(&sf));
1542 
1543   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1544   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1545   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1546   for (i = 0; i < m; i++) {
1547     PetscInt    row = rdest[i];
1548     PetscMPIInt rowner;
1549     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1550     for (j = ai[i]; j < ai[i + 1]; j++) {
1551       PetscInt    col = cdest[aj[j]];
1552       PetscMPIInt cowner;
1553       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1554       if (rowner == cowner) dnnz[i]++;
1555       else onnz[i]++;
1556     }
1557     for (j = bi[i]; j < bi[i + 1]; j++) {
1558       PetscInt    col = gcdest[bj[j]];
1559       PetscMPIInt cowner;
1560       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1561       if (rowner == cowner) dnnz[i]++;
1562       else onnz[i]++;
1563     }
1564   }
1565   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1566   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1567   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1568   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1569   PetscCall(PetscSFDestroy(&rowsf));
1570 
1571   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1572   PetscCall(MatSeqAIJGetArray(aA, &aa));
1573   PetscCall(MatSeqAIJGetArray(aB, &ba));
1574   for (i = 0; i < m; i++) {
1575     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1576     PetscInt  j0, rowlen;
1577     rowlen = ai[i + 1] - ai[i];
1578     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1579       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1580       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1581     }
1582     rowlen = bi[i + 1] - bi[i];
1583     for (j0 = j = 0; j < rowlen; j0 = j) {
1584       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1585       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1586     }
1587   }
1588   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1589   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1590   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1591   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1592   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1593   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1594   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1595   PetscCall(PetscFree3(work, rdest, cdest));
1596   PetscCall(PetscFree(gcdest));
1597   if (parcolp) PetscCall(ISDestroy(&colp));
1598   *B = Aperm;
1599   PetscFunctionReturn(PETSC_SUCCESS);
1600 }
1601 
1602 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1603 {
1604   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1605 
1606   PetscFunctionBegin;
1607   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1608   if (ghosts) *ghosts = aij->garray;
1609   PetscFunctionReturn(PETSC_SUCCESS);
1610 }
1611 
1612 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1613 {
1614   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1615   Mat            A = mat->A, B = mat->B;
1616   PetscLogDouble isend[5], irecv[5];
1617 
1618   PetscFunctionBegin;
1619   info->block_size = 1.0;
1620   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1621 
1622   isend[0] = info->nz_used;
1623   isend[1] = info->nz_allocated;
1624   isend[2] = info->nz_unneeded;
1625   isend[3] = info->memory;
1626   isend[4] = info->mallocs;
1627 
1628   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1629 
1630   isend[0] += info->nz_used;
1631   isend[1] += info->nz_allocated;
1632   isend[2] += info->nz_unneeded;
1633   isend[3] += info->memory;
1634   isend[4] += info->mallocs;
1635   if (flag == MAT_LOCAL) {
1636     info->nz_used      = isend[0];
1637     info->nz_allocated = isend[1];
1638     info->nz_unneeded  = isend[2];
1639     info->memory       = isend[3];
1640     info->mallocs      = isend[4];
1641   } else if (flag == MAT_GLOBAL_MAX) {
1642     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1643 
1644     info->nz_used      = irecv[0];
1645     info->nz_allocated = irecv[1];
1646     info->nz_unneeded  = irecv[2];
1647     info->memory       = irecv[3];
1648     info->mallocs      = irecv[4];
1649   } else if (flag == MAT_GLOBAL_SUM) {
1650     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1651 
1652     info->nz_used      = irecv[0];
1653     info->nz_allocated = irecv[1];
1654     info->nz_unneeded  = irecv[2];
1655     info->memory       = irecv[3];
1656     info->mallocs      = irecv[4];
1657   }
1658   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1659   info->fill_ratio_needed = 0;
1660   info->factor_mallocs    = 0;
1661   PetscFunctionReturn(PETSC_SUCCESS);
1662 }
1663 
1664 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1665 {
1666   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1667 
1668   PetscFunctionBegin;
1669   switch (op) {
1670   case MAT_NEW_NONZERO_LOCATIONS:
1671   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1672   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1673   case MAT_KEEP_NONZERO_PATTERN:
1674   case MAT_NEW_NONZERO_LOCATION_ERR:
1675   case MAT_USE_INODES:
1676   case MAT_IGNORE_ZERO_ENTRIES:
1677   case MAT_FORM_EXPLICIT_TRANSPOSE:
1678     MatCheckPreallocated(A, 1);
1679     PetscCall(MatSetOption(a->A, op, flg));
1680     PetscCall(MatSetOption(a->B, op, flg));
1681     break;
1682   case MAT_ROW_ORIENTED:
1683     MatCheckPreallocated(A, 1);
1684     a->roworiented = flg;
1685 
1686     PetscCall(MatSetOption(a->A, op, flg));
1687     PetscCall(MatSetOption(a->B, op, flg));
1688     break;
1689   case MAT_IGNORE_OFF_PROC_ENTRIES:
1690     a->donotstash = flg;
1691     break;
1692   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1693   case MAT_SPD:
1694   case MAT_SYMMETRIC:
1695   case MAT_STRUCTURALLY_SYMMETRIC:
1696   case MAT_HERMITIAN:
1697   case MAT_SYMMETRY_ETERNAL:
1698   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1699   case MAT_SPD_ETERNAL:
1700     /* if the diagonal matrix is square it inherits some of the properties above */
1701     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1702     break;
1703   case MAT_SUBMAT_SINGLEIS:
1704     A->submat_singleis = flg;
1705     break;
1706   default:
1707     break;
1708   }
1709   PetscFunctionReturn(PETSC_SUCCESS);
1710 }
1711 
1712 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1713 {
1714   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1715   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1716   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1717   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1718   PetscInt    *cmap, *idx_p;
1719 
1720   PetscFunctionBegin;
1721   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1722   mat->getrowactive = PETSC_TRUE;
1723 
1724   if (!mat->rowvalues && (idx || v)) {
1725     /*
1726         allocate enough space to hold information from the longest row.
1727     */
1728     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1729     PetscInt    max = 1, tmp;
1730     for (i = 0; i < matin->rmap->n; i++) {
1731       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1732       if (max < tmp) max = tmp;
1733     }
1734     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1735   }
1736 
1737   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1738   lrow = row - rstart;
1739 
1740   pvA = &vworkA;
1741   pcA = &cworkA;
1742   pvB = &vworkB;
1743   pcB = &cworkB;
1744   if (!v) {
1745     pvA = NULL;
1746     pvB = NULL;
1747   }
1748   if (!idx) {
1749     pcA = NULL;
1750     if (!v) pcB = NULL;
1751   }
1752   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1753   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1754   nztot = nzA + nzB;
1755 
1756   cmap = mat->garray;
1757   if (v || idx) {
1758     if (nztot) {
1759       /* Sort by increasing column numbers, assuming A and B already sorted */
1760       PetscInt imark = -1;
1761       if (v) {
1762         *v = v_p = mat->rowvalues;
1763         for (i = 0; i < nzB; i++) {
1764           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1765           else break;
1766         }
1767         imark = i;
1768         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1769         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1770       }
1771       if (idx) {
1772         *idx = idx_p = mat->rowindices;
1773         if (imark > -1) {
1774           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1775         } else {
1776           for (i = 0; i < nzB; i++) {
1777             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1778             else break;
1779           }
1780           imark = i;
1781         }
1782         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1783         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1784       }
1785     } else {
1786       if (idx) *idx = NULL;
1787       if (v) *v = NULL;
1788     }
1789   }
1790   *nz = nztot;
1791   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1792   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1793   PetscFunctionReturn(PETSC_SUCCESS);
1794 }
1795 
1796 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1797 {
1798   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1799 
1800   PetscFunctionBegin;
1801   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1802   aij->getrowactive = PETSC_FALSE;
1803   PetscFunctionReturn(PETSC_SUCCESS);
1804 }
1805 
1806 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1807 {
1808   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1809   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1810   PetscInt         i, j, cstart = mat->cmap->rstart;
1811   PetscReal        sum = 0.0;
1812   const MatScalar *v, *amata, *bmata;
1813 
1814   PetscFunctionBegin;
1815   if (aij->size == 1) {
1816     PetscCall(MatNorm(aij->A, type, norm));
1817   } else {
1818     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1819     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1820     if (type == NORM_FROBENIUS) {
1821       v = amata;
1822       for (i = 0; i < amat->nz; i++) {
1823         sum += PetscRealPart(PetscConj(*v) * (*v));
1824         v++;
1825       }
1826       v = bmata;
1827       for (i = 0; i < bmat->nz; i++) {
1828         sum += PetscRealPart(PetscConj(*v) * (*v));
1829         v++;
1830       }
1831       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1832       *norm = PetscSqrtReal(*norm);
1833       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1834     } else if (type == NORM_1) { /* max column norm */
1835       PetscReal *tmp;
1836       PetscInt  *jj, *garray = aij->garray;
1837       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1838       *norm = 0.0;
1839       v     = amata;
1840       jj    = amat->j;
1841       for (j = 0; j < amat->nz; j++) {
1842         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1843         v++;
1844       }
1845       v  = bmata;
1846       jj = bmat->j;
1847       for (j = 0; j < bmat->nz; j++) {
1848         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1849         v++;
1850       }
1851       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1852       for (j = 0; j < mat->cmap->N; j++) {
1853         if (tmp[j] > *norm) *norm = tmp[j];
1854       }
1855       PetscCall(PetscFree(tmp));
1856       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1857     } else if (type == NORM_INFINITY) { /* max row norm */
1858       PetscReal ntemp = 0.0;
1859       for (j = 0; j < aij->A->rmap->n; j++) {
1860         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1861         sum = 0.0;
1862         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1863           sum += PetscAbsScalar(*v);
1864           v++;
1865         }
1866         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1867         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1868           sum += PetscAbsScalar(*v);
1869           v++;
1870         }
1871         if (sum > ntemp) ntemp = sum;
1872       }
1873       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1874       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1875     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1876     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1877     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1878   }
1879   PetscFunctionReturn(PETSC_SUCCESS);
1880 }
1881 
1882 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1883 {
1884   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1885   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1886   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1887   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1888   Mat              B, A_diag, *B_diag;
1889   const MatScalar *pbv, *bv;
1890 
1891   PetscFunctionBegin;
1892   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1893   ma = A->rmap->n;
1894   na = A->cmap->n;
1895   mb = a->B->rmap->n;
1896   nb = a->B->cmap->n;
1897   ai = Aloc->i;
1898   aj = Aloc->j;
1899   bi = Bloc->i;
1900   bj = Bloc->j;
1901   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1902     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1903     PetscSFNode         *oloc;
1904     PETSC_UNUSED PetscSF sf;
1905 
1906     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1907     /* compute d_nnz for preallocation */
1908     PetscCall(PetscArrayzero(d_nnz, na));
1909     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1910     /* compute local off-diagonal contributions */
1911     PetscCall(PetscArrayzero(g_nnz, nb));
1912     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1913     /* map those to global */
1914     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1915     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1916     PetscCall(PetscSFSetFromOptions(sf));
1917     PetscCall(PetscArrayzero(o_nnz, na));
1918     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1919     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1920     PetscCall(PetscSFDestroy(&sf));
1921 
1922     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1923     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1924     PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs));
1925     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1926     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1927     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1928   } else {
1929     B = *matout;
1930     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1931   }
1932 
1933   b           = (Mat_MPIAIJ *)B->data;
1934   A_diag      = a->A;
1935   B_diag      = &b->A;
1936   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1937   A_diag_ncol = A_diag->cmap->N;
1938   B_diag_ilen = sub_B_diag->ilen;
1939   B_diag_i    = sub_B_diag->i;
1940 
1941   /* Set ilen for diagonal of B */
1942   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1943 
1944   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1945   very quickly (=without using MatSetValues), because all writes are local. */
1946   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1947   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1948 
1949   /* copy over the B part */
1950   PetscCall(PetscMalloc1(bi[mb], &cols));
1951   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1952   pbv = bv;
1953   row = A->rmap->rstart;
1954   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1955   cols_tmp = cols;
1956   for (i = 0; i < mb; i++) {
1957     ncol = bi[i + 1] - bi[i];
1958     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1959     row++;
1960     if (pbv) pbv += ncol;
1961     if (cols_tmp) cols_tmp += ncol;
1962   }
1963   PetscCall(PetscFree(cols));
1964   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1965 
1966   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1967   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1968   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1969     *matout = B;
1970   } else {
1971     PetscCall(MatHeaderMerge(A, &B));
1972   }
1973   PetscFunctionReturn(PETSC_SUCCESS);
1974 }
1975 
1976 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1977 {
1978   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1979   Mat         a = aij->A, b = aij->B;
1980   PetscInt    s1, s2, s3;
1981 
1982   PetscFunctionBegin;
1983   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1984   if (rr) {
1985     PetscCall(VecGetLocalSize(rr, &s1));
1986     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1987     /* Overlap communication with computation. */
1988     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1989   }
1990   if (ll) {
1991     PetscCall(VecGetLocalSize(ll, &s1));
1992     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1993     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1994   }
1995   /* scale  the diagonal block */
1996   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1997 
1998   if (rr) {
1999     /* Do a scatter end and then right scale the off-diagonal block */
2000     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2001     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2002   }
2003   PetscFunctionReturn(PETSC_SUCCESS);
2004 }
2005 
2006 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2007 {
2008   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2009 
2010   PetscFunctionBegin;
2011   PetscCall(MatSetUnfactored(a->A));
2012   PetscFunctionReturn(PETSC_SUCCESS);
2013 }
2014 
2015 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2016 {
2017   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2018   Mat         a, b, c, d;
2019   PetscBool   flg;
2020 
2021   PetscFunctionBegin;
2022   a = matA->A;
2023   b = matA->B;
2024   c = matB->A;
2025   d = matB->B;
2026 
2027   PetscCall(MatEqual(a, c, &flg));
2028   if (flg) PetscCall(MatEqual(b, d, &flg));
2029   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2030   PetscFunctionReturn(PETSC_SUCCESS);
2031 }
2032 
2033 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2034 {
2035   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2036   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2037 
2038   PetscFunctionBegin;
2039   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2040   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2041     /* because of the column compression in the off-processor part of the matrix a->B,
2042        the number of columns in a->B and b->B may be different, hence we cannot call
2043        the MatCopy() directly on the two parts. If need be, we can provide a more
2044        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2045        then copying the submatrices */
2046     PetscCall(MatCopy_Basic(A, B, str));
2047   } else {
2048     PetscCall(MatCopy(a->A, b->A, str));
2049     PetscCall(MatCopy(a->B, b->B, str));
2050   }
2051   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2052   PetscFunctionReturn(PETSC_SUCCESS);
2053 }
2054 
2055 /*
2056    Computes the number of nonzeros per row needed for preallocation when X and Y
2057    have different nonzero structure.
2058 */
2059 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2060 {
2061   PetscInt i, j, k, nzx, nzy;
2062 
2063   PetscFunctionBegin;
2064   /* Set the number of nonzeros in the new matrix */
2065   for (i = 0; i < m; i++) {
2066     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2067     nzx    = xi[i + 1] - xi[i];
2068     nzy    = yi[i + 1] - yi[i];
2069     nnz[i] = 0;
2070     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2071       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2072       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2073       nnz[i]++;
2074     }
2075     for (; k < nzy; k++) nnz[i]++;
2076   }
2077   PetscFunctionReturn(PETSC_SUCCESS);
2078 }
2079 
2080 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2081 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2082 {
2083   PetscInt    m = Y->rmap->N;
2084   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2085   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2086 
2087   PetscFunctionBegin;
2088   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2089   PetscFunctionReturn(PETSC_SUCCESS);
2090 }
2091 
2092 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2093 {
2094   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2095 
2096   PetscFunctionBegin;
2097   if (str == SAME_NONZERO_PATTERN) {
2098     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2099     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2100   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2101     PetscCall(MatAXPY_Basic(Y, a, X, str));
2102   } else {
2103     Mat       B;
2104     PetscInt *nnz_d, *nnz_o;
2105 
2106     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2107     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2108     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2109     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2110     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2111     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2112     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2113     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2114     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2115     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2116     PetscCall(MatHeaderMerge(Y, &B));
2117     PetscCall(PetscFree(nnz_d));
2118     PetscCall(PetscFree(nnz_o));
2119   }
2120   PetscFunctionReturn(PETSC_SUCCESS);
2121 }
2122 
2123 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2124 
2125 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2126 {
2127   PetscFunctionBegin;
2128   if (PetscDefined(USE_COMPLEX)) {
2129     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2130 
2131     PetscCall(MatConjugate_SeqAIJ(aij->A));
2132     PetscCall(MatConjugate_SeqAIJ(aij->B));
2133   }
2134   PetscFunctionReturn(PETSC_SUCCESS);
2135 }
2136 
2137 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2138 {
2139   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2140 
2141   PetscFunctionBegin;
2142   PetscCall(MatRealPart(a->A));
2143   PetscCall(MatRealPart(a->B));
2144   PetscFunctionReturn(PETSC_SUCCESS);
2145 }
2146 
2147 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2148 {
2149   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2150 
2151   PetscFunctionBegin;
2152   PetscCall(MatImaginaryPart(a->A));
2153   PetscCall(MatImaginaryPart(a->B));
2154   PetscFunctionReturn(PETSC_SUCCESS);
2155 }
2156 
2157 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2158 {
2159   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2160   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2161   PetscScalar       *vv;
2162   Vec                vB, vA;
2163   const PetscScalar *va, *vb;
2164 
2165   PetscFunctionBegin;
2166   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2167   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2168 
2169   PetscCall(VecGetArrayRead(vA, &va));
2170   if (idx) {
2171     for (i = 0; i < m; i++) {
2172       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2173     }
2174   }
2175 
2176   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2177   PetscCall(PetscMalloc1(m, &idxb));
2178   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2179 
2180   PetscCall(VecGetArrayWrite(v, &vv));
2181   PetscCall(VecGetArrayRead(vB, &vb));
2182   for (i = 0; i < m; i++) {
2183     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2184       vv[i] = vb[i];
2185       if (idx) idx[i] = a->garray[idxb[i]];
2186     } else {
2187       vv[i] = va[i];
2188       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2189     }
2190   }
2191   PetscCall(VecRestoreArrayWrite(v, &vv));
2192   PetscCall(VecRestoreArrayRead(vA, &va));
2193   PetscCall(VecRestoreArrayRead(vB, &vb));
2194   PetscCall(PetscFree(idxb));
2195   PetscCall(VecDestroy(&vA));
2196   PetscCall(VecDestroy(&vB));
2197   PetscFunctionReturn(PETSC_SUCCESS);
2198 }
2199 
2200 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2201 {
2202   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2203   Vec         vB, vA;
2204 
2205   PetscFunctionBegin;
2206   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2207   PetscCall(MatGetRowSumAbs(a->A, vA));
2208   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2209   PetscCall(MatGetRowSumAbs(a->B, vB));
2210   PetscCall(VecAXPY(vA, 1.0, vB));
2211   PetscCall(VecDestroy(&vB));
2212   PetscCall(VecCopy(vA, v));
2213   PetscCall(VecDestroy(&vA));
2214   PetscFunctionReturn(PETSC_SUCCESS);
2215 }
2216 
2217 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2218 {
2219   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2220   PetscInt           m = A->rmap->n, n = A->cmap->n;
2221   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2222   PetscInt          *cmap = mat->garray;
2223   PetscInt          *diagIdx, *offdiagIdx;
2224   Vec                diagV, offdiagV;
2225   PetscScalar       *a, *diagA, *offdiagA;
2226   const PetscScalar *ba, *bav;
2227   PetscInt           r, j, col, ncols, *bi, *bj;
2228   Mat                B = mat->B;
2229   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2230 
2231   PetscFunctionBegin;
2232   /* When a process holds entire A and other processes have no entry */
2233   if (A->cmap->N == n) {
2234     PetscCall(VecGetArrayWrite(v, &diagA));
2235     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2236     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2237     PetscCall(VecDestroy(&diagV));
2238     PetscCall(VecRestoreArrayWrite(v, &diagA));
2239     PetscFunctionReturn(PETSC_SUCCESS);
2240   } else if (n == 0) {
2241     if (m) {
2242       PetscCall(VecGetArrayWrite(v, &a));
2243       for (r = 0; r < m; r++) {
2244         a[r] = 0.0;
2245         if (idx) idx[r] = -1;
2246       }
2247       PetscCall(VecRestoreArrayWrite(v, &a));
2248     }
2249     PetscFunctionReturn(PETSC_SUCCESS);
2250   }
2251 
2252   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2253   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2254   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2255   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2256 
2257   /* Get offdiagIdx[] for implicit 0.0 */
2258   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2259   ba = bav;
2260   bi = b->i;
2261   bj = b->j;
2262   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2263   for (r = 0; r < m; r++) {
2264     ncols = bi[r + 1] - bi[r];
2265     if (ncols == A->cmap->N - n) { /* Brow is dense */
2266       offdiagA[r]   = *ba;
2267       offdiagIdx[r] = cmap[0];
2268     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2269       offdiagA[r] = 0.0;
2270 
2271       /* Find first hole in the cmap */
2272       for (j = 0; j < ncols; j++) {
2273         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2274         if (col > j && j < cstart) {
2275           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2276           break;
2277         } else if (col > j + n && j >= cstart) {
2278           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2279           break;
2280         }
2281       }
2282       if (j == ncols && ncols < A->cmap->N - n) {
2283         /* a hole is outside compressed Bcols */
2284         if (ncols == 0) {
2285           if (cstart) {
2286             offdiagIdx[r] = 0;
2287           } else offdiagIdx[r] = cend;
2288         } else { /* ncols > 0 */
2289           offdiagIdx[r] = cmap[ncols - 1] + 1;
2290           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2291         }
2292       }
2293     }
2294 
2295     for (j = 0; j < ncols; j++) {
2296       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2297         offdiagA[r]   = *ba;
2298         offdiagIdx[r] = cmap[*bj];
2299       }
2300       ba++;
2301       bj++;
2302     }
2303   }
2304 
2305   PetscCall(VecGetArrayWrite(v, &a));
2306   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2307   for (r = 0; r < m; ++r) {
2308     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2309       a[r] = diagA[r];
2310       if (idx) idx[r] = cstart + diagIdx[r];
2311     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2312       a[r] = diagA[r];
2313       if (idx) {
2314         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2315           idx[r] = cstart + diagIdx[r];
2316         } else idx[r] = offdiagIdx[r];
2317       }
2318     } else {
2319       a[r] = offdiagA[r];
2320       if (idx) idx[r] = offdiagIdx[r];
2321     }
2322   }
2323   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2324   PetscCall(VecRestoreArrayWrite(v, &a));
2325   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2326   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2327   PetscCall(VecDestroy(&diagV));
2328   PetscCall(VecDestroy(&offdiagV));
2329   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2330   PetscFunctionReturn(PETSC_SUCCESS);
2331 }
2332 
2333 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2334 {
2335   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2336   PetscInt           m = A->rmap->n, n = A->cmap->n;
2337   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2338   PetscInt          *cmap = mat->garray;
2339   PetscInt          *diagIdx, *offdiagIdx;
2340   Vec                diagV, offdiagV;
2341   PetscScalar       *a, *diagA, *offdiagA;
2342   const PetscScalar *ba, *bav;
2343   PetscInt           r, j, col, ncols, *bi, *bj;
2344   Mat                B = mat->B;
2345   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2346 
2347   PetscFunctionBegin;
2348   /* When a process holds entire A and other processes have no entry */
2349   if (A->cmap->N == n) {
2350     PetscCall(VecGetArrayWrite(v, &diagA));
2351     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2352     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2353     PetscCall(VecDestroy(&diagV));
2354     PetscCall(VecRestoreArrayWrite(v, &diagA));
2355     PetscFunctionReturn(PETSC_SUCCESS);
2356   } else if (n == 0) {
2357     if (m) {
2358       PetscCall(VecGetArrayWrite(v, &a));
2359       for (r = 0; r < m; r++) {
2360         a[r] = PETSC_MAX_REAL;
2361         if (idx) idx[r] = -1;
2362       }
2363       PetscCall(VecRestoreArrayWrite(v, &a));
2364     }
2365     PetscFunctionReturn(PETSC_SUCCESS);
2366   }
2367 
2368   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2369   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2370   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2371   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2372 
2373   /* Get offdiagIdx[] for implicit 0.0 */
2374   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2375   ba = bav;
2376   bi = b->i;
2377   bj = b->j;
2378   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2379   for (r = 0; r < m; r++) {
2380     ncols = bi[r + 1] - bi[r];
2381     if (ncols == A->cmap->N - n) { /* Brow is dense */
2382       offdiagA[r]   = *ba;
2383       offdiagIdx[r] = cmap[0];
2384     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2385       offdiagA[r] = 0.0;
2386 
2387       /* Find first hole in the cmap */
2388       for (j = 0; j < ncols; j++) {
2389         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2390         if (col > j && j < cstart) {
2391           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2392           break;
2393         } else if (col > j + n && j >= cstart) {
2394           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2395           break;
2396         }
2397       }
2398       if (j == ncols && ncols < A->cmap->N - n) {
2399         /* a hole is outside compressed Bcols */
2400         if (ncols == 0) {
2401           if (cstart) {
2402             offdiagIdx[r] = 0;
2403           } else offdiagIdx[r] = cend;
2404         } else { /* ncols > 0 */
2405           offdiagIdx[r] = cmap[ncols - 1] + 1;
2406           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2407         }
2408       }
2409     }
2410 
2411     for (j = 0; j < ncols; j++) {
2412       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2413         offdiagA[r]   = *ba;
2414         offdiagIdx[r] = cmap[*bj];
2415       }
2416       ba++;
2417       bj++;
2418     }
2419   }
2420 
2421   PetscCall(VecGetArrayWrite(v, &a));
2422   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2423   for (r = 0; r < m; ++r) {
2424     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2425       a[r] = diagA[r];
2426       if (idx) idx[r] = cstart + diagIdx[r];
2427     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2428       a[r] = diagA[r];
2429       if (idx) {
2430         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2431           idx[r] = cstart + diagIdx[r];
2432         } else idx[r] = offdiagIdx[r];
2433       }
2434     } else {
2435       a[r] = offdiagA[r];
2436       if (idx) idx[r] = offdiagIdx[r];
2437     }
2438   }
2439   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2440   PetscCall(VecRestoreArrayWrite(v, &a));
2441   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2442   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2443   PetscCall(VecDestroy(&diagV));
2444   PetscCall(VecDestroy(&offdiagV));
2445   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2446   PetscFunctionReturn(PETSC_SUCCESS);
2447 }
2448 
2449 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2450 {
2451   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2452   PetscInt           m = A->rmap->n, n = A->cmap->n;
2453   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2454   PetscInt          *cmap = mat->garray;
2455   PetscInt          *diagIdx, *offdiagIdx;
2456   Vec                diagV, offdiagV;
2457   PetscScalar       *a, *diagA, *offdiagA;
2458   const PetscScalar *ba, *bav;
2459   PetscInt           r, j, col, ncols, *bi, *bj;
2460   Mat                B = mat->B;
2461   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2462 
2463   PetscFunctionBegin;
2464   /* When a process holds entire A and other processes have no entry */
2465   if (A->cmap->N == n) {
2466     PetscCall(VecGetArrayWrite(v, &diagA));
2467     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2468     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2469     PetscCall(VecDestroy(&diagV));
2470     PetscCall(VecRestoreArrayWrite(v, &diagA));
2471     PetscFunctionReturn(PETSC_SUCCESS);
2472   } else if (n == 0) {
2473     if (m) {
2474       PetscCall(VecGetArrayWrite(v, &a));
2475       for (r = 0; r < m; r++) {
2476         a[r] = PETSC_MIN_REAL;
2477         if (idx) idx[r] = -1;
2478       }
2479       PetscCall(VecRestoreArrayWrite(v, &a));
2480     }
2481     PetscFunctionReturn(PETSC_SUCCESS);
2482   }
2483 
2484   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2485   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2486   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2487   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2488 
2489   /* Get offdiagIdx[] for implicit 0.0 */
2490   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2491   ba = bav;
2492   bi = b->i;
2493   bj = b->j;
2494   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2495   for (r = 0; r < m; r++) {
2496     ncols = bi[r + 1] - bi[r];
2497     if (ncols == A->cmap->N - n) { /* Brow is dense */
2498       offdiagA[r]   = *ba;
2499       offdiagIdx[r] = cmap[0];
2500     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2501       offdiagA[r] = 0.0;
2502 
2503       /* Find first hole in the cmap */
2504       for (j = 0; j < ncols; j++) {
2505         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2506         if (col > j && j < cstart) {
2507           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2508           break;
2509         } else if (col > j + n && j >= cstart) {
2510           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2511           break;
2512         }
2513       }
2514       if (j == ncols && ncols < A->cmap->N - n) {
2515         /* a hole is outside compressed Bcols */
2516         if (ncols == 0) {
2517           if (cstart) {
2518             offdiagIdx[r] = 0;
2519           } else offdiagIdx[r] = cend;
2520         } else { /* ncols > 0 */
2521           offdiagIdx[r] = cmap[ncols - 1] + 1;
2522           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2523         }
2524       }
2525     }
2526 
2527     for (j = 0; j < ncols; j++) {
2528       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2529         offdiagA[r]   = *ba;
2530         offdiagIdx[r] = cmap[*bj];
2531       }
2532       ba++;
2533       bj++;
2534     }
2535   }
2536 
2537   PetscCall(VecGetArrayWrite(v, &a));
2538   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2539   for (r = 0; r < m; ++r) {
2540     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2541       a[r] = diagA[r];
2542       if (idx) idx[r] = cstart + diagIdx[r];
2543     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2544       a[r] = diagA[r];
2545       if (idx) {
2546         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2547           idx[r] = cstart + diagIdx[r];
2548         } else idx[r] = offdiagIdx[r];
2549       }
2550     } else {
2551       a[r] = offdiagA[r];
2552       if (idx) idx[r] = offdiagIdx[r];
2553     }
2554   }
2555   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2556   PetscCall(VecRestoreArrayWrite(v, &a));
2557   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2558   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2559   PetscCall(VecDestroy(&diagV));
2560   PetscCall(VecDestroy(&offdiagV));
2561   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2562   PetscFunctionReturn(PETSC_SUCCESS);
2563 }
2564 
2565 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2566 {
2567   Mat *dummy;
2568 
2569   PetscFunctionBegin;
2570   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2571   *newmat = *dummy;
2572   PetscCall(PetscFree(dummy));
2573   PetscFunctionReturn(PETSC_SUCCESS);
2574 }
2575 
2576 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2577 {
2578   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2579 
2580   PetscFunctionBegin;
2581   PetscCall(MatInvertBlockDiagonal(a->A, values));
2582   A->factorerrortype = a->A->factorerrortype;
2583   PetscFunctionReturn(PETSC_SUCCESS);
2584 }
2585 
2586 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2587 {
2588   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2589 
2590   PetscFunctionBegin;
2591   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2592   PetscCall(MatSetRandom(aij->A, rctx));
2593   if (x->assembled) {
2594     PetscCall(MatSetRandom(aij->B, rctx));
2595   } else {
2596     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2597   }
2598   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2599   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2600   PetscFunctionReturn(PETSC_SUCCESS);
2601 }
2602 
2603 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2604 {
2605   PetscFunctionBegin;
2606   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2607   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2608   PetscFunctionReturn(PETSC_SUCCESS);
2609 }
2610 
2611 /*@
2612   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2613 
2614   Not Collective
2615 
2616   Input Parameter:
2617 . A - the matrix
2618 
2619   Output Parameter:
2620 . nz - the number of nonzeros
2621 
2622   Level: advanced
2623 
2624 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2625 @*/
2626 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2627 {
2628   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2629   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2630   PetscBool   isaij;
2631 
2632   PetscFunctionBegin;
2633   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2634   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2635   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2636   PetscFunctionReturn(PETSC_SUCCESS);
2637 }
2638 
2639 /*@
2640   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2641 
2642   Collective
2643 
2644   Input Parameters:
2645 + A  - the matrix
2646 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2647 
2648   Level: advanced
2649 
2650 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2651 @*/
2652 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2653 {
2654   PetscFunctionBegin;
2655   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2656   PetscFunctionReturn(PETSC_SUCCESS);
2657 }
2658 
2659 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject)
2660 {
2661   PetscBool sc = PETSC_FALSE, flg;
2662 
2663   PetscFunctionBegin;
2664   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2665   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2666   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2667   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2668   PetscOptionsHeadEnd();
2669   PetscFunctionReturn(PETSC_SUCCESS);
2670 }
2671 
2672 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2673 {
2674   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2675   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2676 
2677   PetscFunctionBegin;
2678   if (!Y->preallocated) {
2679     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2680   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2681     PetscInt nonew = aij->nonew;
2682     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2683     aij->nonew = nonew;
2684   }
2685   PetscCall(MatShift_Basic(Y, a));
2686   PetscFunctionReturn(PETSC_SUCCESS);
2687 }
2688 
2689 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2690 {
2691   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2692 
2693   PetscFunctionBegin;
2694   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2695   PetscCall(MatMissingDiagonal(a->A, missing, d));
2696   if (d) {
2697     PetscInt rstart;
2698     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2699     *d += rstart;
2700   }
2701   PetscFunctionReturn(PETSC_SUCCESS);
2702 }
2703 
2704 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2705 {
2706   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2707 
2708   PetscFunctionBegin;
2709   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2710   PetscFunctionReturn(PETSC_SUCCESS);
2711 }
2712 
2713 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2714 {
2715   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2716 
2717   PetscFunctionBegin;
2718   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2719   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2720   PetscFunctionReturn(PETSC_SUCCESS);
2721 }
2722 
2723 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2724                                        MatGetRow_MPIAIJ,
2725                                        MatRestoreRow_MPIAIJ,
2726                                        MatMult_MPIAIJ,
2727                                        /* 4*/ MatMultAdd_MPIAIJ,
2728                                        MatMultTranspose_MPIAIJ,
2729                                        MatMultTransposeAdd_MPIAIJ,
2730                                        NULL,
2731                                        NULL,
2732                                        NULL,
2733                                        /*10*/ NULL,
2734                                        NULL,
2735                                        NULL,
2736                                        MatSOR_MPIAIJ,
2737                                        MatTranspose_MPIAIJ,
2738                                        /*15*/ MatGetInfo_MPIAIJ,
2739                                        MatEqual_MPIAIJ,
2740                                        MatGetDiagonal_MPIAIJ,
2741                                        MatDiagonalScale_MPIAIJ,
2742                                        MatNorm_MPIAIJ,
2743                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2744                                        MatAssemblyEnd_MPIAIJ,
2745                                        MatSetOption_MPIAIJ,
2746                                        MatZeroEntries_MPIAIJ,
2747                                        /*24*/ MatZeroRows_MPIAIJ,
2748                                        NULL,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        /*29*/ MatSetUp_MPI_Hash,
2753                                        NULL,
2754                                        NULL,
2755                                        MatGetDiagonalBlock_MPIAIJ,
2756                                        NULL,
2757                                        /*34*/ MatDuplicate_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        /*39*/ MatAXPY_MPIAIJ,
2763                                        MatCreateSubMatrices_MPIAIJ,
2764                                        MatIncreaseOverlap_MPIAIJ,
2765                                        MatGetValues_MPIAIJ,
2766                                        MatCopy_MPIAIJ,
2767                                        /*44*/ MatGetRowMax_MPIAIJ,
2768                                        MatScale_MPIAIJ,
2769                                        MatShift_MPIAIJ,
2770                                        MatDiagonalSet_MPIAIJ,
2771                                        MatZeroRowsColumns_MPIAIJ,
2772                                        /*49*/ MatSetRandom_MPIAIJ,
2773                                        MatGetRowIJ_MPIAIJ,
2774                                        MatRestoreRowIJ_MPIAIJ,
2775                                        NULL,
2776                                        NULL,
2777                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2778                                        NULL,
2779                                        MatSetUnfactored_MPIAIJ,
2780                                        MatPermute_MPIAIJ,
2781                                        NULL,
2782                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2783                                        MatDestroy_MPIAIJ,
2784                                        MatView_MPIAIJ,
2785                                        NULL,
2786                                        NULL,
2787                                        /*64*/ MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                        NULL,
2791                                        MatGetRowMaxAbs_MPIAIJ,
2792                                        /*69*/ MatGetRowMinAbs_MPIAIJ,
2793                                        NULL,
2794                                        NULL,
2795                                        MatFDColoringApply_AIJ,
2796                                        MatSetFromOptions_MPIAIJ,
2797                                        MatFindZeroDiagonals_MPIAIJ,
2798                                        /*75*/ NULL,
2799                                        NULL,
2800                                        NULL,
2801                                        MatLoad_MPIAIJ,
2802                                        NULL,
2803                                        /*80*/ NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        /*83*/ NULL,
2807                                        NULL,
2808                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2809                                        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2810                                        NULL,
2811                                        NULL,
2812                                        /*89*/ MatBindToCPU_MPIAIJ,
2813                                        MatProductSetFromOptions_MPIAIJ,
2814                                        NULL,
2815                                        NULL,
2816                                        MatConjugate_MPIAIJ,
2817                                        /*94*/ NULL,
2818                                        MatSetValuesRow_MPIAIJ,
2819                                        MatRealPart_MPIAIJ,
2820                                        MatImaginaryPart_MPIAIJ,
2821                                        NULL,
2822                                        /*99*/ NULL,
2823                                        NULL,
2824                                        NULL,
2825                                        MatGetRowMin_MPIAIJ,
2826                                        NULL,
2827                                        /*104*/ MatMissingDiagonal_MPIAIJ,
2828                                        MatGetSeqNonzeroStructure_MPIAIJ,
2829                                        NULL,
2830                                        MatGetGhosts_MPIAIJ,
2831                                        NULL,
2832                                        /*109*/ NULL,
2833                                        MatMultDiagonalBlock_MPIAIJ,
2834                                        NULL,
2835                                        NULL,
2836                                        NULL,
2837                                        /*114*/ MatGetMultiProcBlock_MPIAIJ,
2838                                        MatFindNonzeroRows_MPIAIJ,
2839                                        MatGetColumnReductions_MPIAIJ,
2840                                        MatInvertBlockDiagonal_MPIAIJ,
2841                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2842                                        /*119*/ MatCreateSubMatricesMPI_MPIAIJ,
2843                                        NULL,
2844                                        NULL,
2845                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2846                                        NULL,
2847                                        /*124*/ NULL,
2848                                        NULL,
2849                                        NULL,
2850                                        MatSetBlockSizes_MPIAIJ,
2851                                        NULL,
2852                                        /*129*/ MatFDColoringSetUp_MPIXAIJ,
2853                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2854                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2855                                        NULL,
2856                                        NULL,
2857                                        /*134*/ NULL,
2858                                        MatCreateGraph_Simple_AIJ,
2859                                        NULL,
2860                                        MatEliminateZeros_MPIAIJ,
2861                                        MatGetRowSumAbs_MPIAIJ,
2862                                        /*139*/ NULL,
2863                                        NULL,
2864                                        NULL,
2865                                        MatCopyHashToXAIJ_MPI_Hash,
2866                                        MatGetCurrentMemType_MPIAIJ,
2867                                        NULL};
2868 
2869 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2870 {
2871   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2872 
2873   PetscFunctionBegin;
2874   PetscCall(MatStoreValues(aij->A));
2875   PetscCall(MatStoreValues(aij->B));
2876   PetscFunctionReturn(PETSC_SUCCESS);
2877 }
2878 
2879 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2880 {
2881   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2882 
2883   PetscFunctionBegin;
2884   PetscCall(MatRetrieveValues(aij->A));
2885   PetscCall(MatRetrieveValues(aij->B));
2886   PetscFunctionReturn(PETSC_SUCCESS);
2887 }
2888 
2889 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2890 {
2891   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2892   PetscMPIInt size;
2893 
2894   PetscFunctionBegin;
2895   if (B->hash_active) {
2896     B->ops[0]      = b->cops;
2897     B->hash_active = PETSC_FALSE;
2898   }
2899   PetscCall(PetscLayoutSetUp(B->rmap));
2900   PetscCall(PetscLayoutSetUp(B->cmap));
2901 
2902 #if defined(PETSC_USE_CTABLE)
2903   PetscCall(PetscHMapIDestroy(&b->colmap));
2904 #else
2905   PetscCall(PetscFree(b->colmap));
2906 #endif
2907   PetscCall(PetscFree(b->garray));
2908   PetscCall(VecDestroy(&b->lvec));
2909   PetscCall(VecScatterDestroy(&b->Mvctx));
2910 
2911   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2912 
2913   MatSeqXAIJGetOptions_Private(b->B);
2914   PetscCall(MatDestroy(&b->B));
2915   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2916   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2917   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2918   PetscCall(MatSetType(b->B, MATSEQAIJ));
2919   MatSeqXAIJRestoreOptions_Private(b->B);
2920 
2921   MatSeqXAIJGetOptions_Private(b->A);
2922   PetscCall(MatDestroy(&b->A));
2923   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2924   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2925   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2926   PetscCall(MatSetType(b->A, MATSEQAIJ));
2927   MatSeqXAIJRestoreOptions_Private(b->A);
2928 
2929   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2930   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2931   B->preallocated  = PETSC_TRUE;
2932   B->was_assembled = PETSC_FALSE;
2933   B->assembled     = PETSC_FALSE;
2934   PetscFunctionReturn(PETSC_SUCCESS);
2935 }
2936 
2937 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2938 {
2939   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2940   PetscBool   ondiagreset, offdiagreset, memoryreset;
2941 
2942   PetscFunctionBegin;
2943   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2944   PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()");
2945   if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS);
2946 
2947   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset));
2948   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset));
2949   memoryreset = (PetscBool)(ondiagreset || offdiagreset);
2950   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B)));
2951   if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS);
2952 
2953   PetscCall(PetscLayoutSetUp(B->rmap));
2954   PetscCall(PetscLayoutSetUp(B->cmap));
2955   PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled");
2956   PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2957   PetscCall(VecScatterDestroy(&b->Mvctx));
2958 
2959   B->preallocated  = PETSC_TRUE;
2960   B->was_assembled = PETSC_FALSE;
2961   B->assembled     = PETSC_FALSE;
2962   /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */
2963   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2964   PetscFunctionReturn(PETSC_SUCCESS);
2965 }
2966 
2967 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2968 {
2969   Mat         mat;
2970   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2971 
2972   PetscFunctionBegin;
2973   *newmat = NULL;
2974   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2975   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2976   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2977   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2978   a = (Mat_MPIAIJ *)mat->data;
2979 
2980   mat->factortype = matin->factortype;
2981   mat->assembled  = matin->assembled;
2982   mat->insertmode = NOT_SET_VALUES;
2983 
2984   a->size         = oldmat->size;
2985   a->rank         = oldmat->rank;
2986   a->donotstash   = oldmat->donotstash;
2987   a->roworiented  = oldmat->roworiented;
2988   a->rowindices   = NULL;
2989   a->rowvalues    = NULL;
2990   a->getrowactive = PETSC_FALSE;
2991 
2992   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2993   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2994   if (matin->hash_active) {
2995     PetscCall(MatSetUp(mat));
2996   } else {
2997     mat->preallocated = matin->preallocated;
2998     if (oldmat->colmap) {
2999 #if defined(PETSC_USE_CTABLE)
3000       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3001 #else
3002       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3003       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3004 #endif
3005     } else a->colmap = NULL;
3006     if (oldmat->garray) {
3007       PetscInt len;
3008       len = oldmat->B->cmap->n;
3009       PetscCall(PetscMalloc1(len + 1, &a->garray));
3010       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3011     } else a->garray = NULL;
3012 
3013     /* It may happen MatDuplicate is called with a non-assembled matrix
3014       In fact, MatDuplicate only requires the matrix to be preallocated
3015       This may happen inside a DMCreateMatrix_Shell */
3016     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3017     if (oldmat->Mvctx) {
3018       a->Mvctx = oldmat->Mvctx;
3019       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3020     }
3021     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3022     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3023   }
3024   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3025   *newmat = mat;
3026   PetscFunctionReturn(PETSC_SUCCESS);
3027 }
3028 
3029 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3030 {
3031   PetscBool isbinary, ishdf5;
3032 
3033   PetscFunctionBegin;
3034   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3035   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3036   /* force binary viewer to load .info file if it has not yet done so */
3037   PetscCall(PetscViewerSetUp(viewer));
3038   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3039   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3040   if (isbinary) {
3041     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3042   } else if (ishdf5) {
3043 #if defined(PETSC_HAVE_HDF5)
3044     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3045 #else
3046     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3047 #endif
3048   } else {
3049     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3050   }
3051   PetscFunctionReturn(PETSC_SUCCESS);
3052 }
3053 
3054 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3055 {
3056   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3057   PetscInt    *rowidxs, *colidxs;
3058   PetscScalar *matvals;
3059 
3060   PetscFunctionBegin;
3061   PetscCall(PetscViewerSetUp(viewer));
3062 
3063   /* read in matrix header */
3064   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3065   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3066   M  = header[1];
3067   N  = header[2];
3068   nz = header[3];
3069   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3070   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3071   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3072 
3073   /* set block sizes from the viewer's .info file */
3074   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3075   /* set global sizes if not set already */
3076   if (mat->rmap->N < 0) mat->rmap->N = M;
3077   if (mat->cmap->N < 0) mat->cmap->N = N;
3078   PetscCall(PetscLayoutSetUp(mat->rmap));
3079   PetscCall(PetscLayoutSetUp(mat->cmap));
3080 
3081   /* check if the matrix sizes are correct */
3082   PetscCall(MatGetSize(mat, &rows, &cols));
3083   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3084 
3085   /* read in row lengths and build row indices */
3086   PetscCall(MatGetLocalSize(mat, &m, NULL));
3087   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3088   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3089   rowidxs[0] = 0;
3090   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3091   if (nz != PETSC_INT_MAX) {
3092     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3093     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3094   }
3095 
3096   /* read in column indices and matrix values */
3097   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3098   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3099   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3100   /* store matrix indices and values */
3101   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3102   PetscCall(PetscFree(rowidxs));
3103   PetscCall(PetscFree2(colidxs, matvals));
3104   PetscFunctionReturn(PETSC_SUCCESS);
3105 }
3106 
3107 /* Not scalable because of ISAllGather() unless getting all columns. */
3108 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3109 {
3110   IS          iscol_local;
3111   PetscBool   isstride;
3112   PetscMPIInt gisstride = 0;
3113 
3114   PetscFunctionBegin;
3115   /* check if we are grabbing all columns*/
3116   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3117 
3118   if (isstride) {
3119     PetscInt start, len, mstart, mlen;
3120     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3121     PetscCall(ISGetLocalSize(iscol, &len));
3122     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3123     if (mstart == start && mlen - mstart == len) gisstride = 1;
3124   }
3125 
3126   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3127   if (gisstride) {
3128     PetscInt N;
3129     PetscCall(MatGetSize(mat, NULL, &N));
3130     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3131     PetscCall(ISSetIdentity(iscol_local));
3132     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3133   } else {
3134     PetscInt cbs;
3135     PetscCall(ISGetBlockSize(iscol, &cbs));
3136     PetscCall(ISAllGather(iscol, &iscol_local));
3137     PetscCall(ISSetBlockSize(iscol_local, cbs));
3138   }
3139 
3140   *isseq = iscol_local;
3141   PetscFunctionReturn(PETSC_SUCCESS);
3142 }
3143 
3144 /*
3145  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3146  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3147 
3148  Input Parameters:
3149 +   mat - matrix
3150 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3151            i.e., mat->rstart <= isrow[i] < mat->rend
3152 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3153            i.e., mat->cstart <= iscol[i] < mat->cend
3154 
3155  Output Parameters:
3156 +   isrow_d - sequential row index set for retrieving mat->A
3157 .   iscol_d - sequential  column index set for retrieving mat->A
3158 .   iscol_o - sequential column index set for retrieving mat->B
3159 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3160  */
3161 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[])
3162 {
3163   Vec             x, cmap;
3164   const PetscInt *is_idx;
3165   PetscScalar    *xarray, *cmaparray;
3166   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3167   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3168   Mat             B    = a->B;
3169   Vec             lvec = a->lvec, lcmap;
3170   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3171   MPI_Comm        comm;
3172   VecScatter      Mvctx = a->Mvctx;
3173 
3174   PetscFunctionBegin;
3175   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3176   PetscCall(ISGetLocalSize(iscol, &ncols));
3177 
3178   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3179   PetscCall(MatCreateVecs(mat, &x, NULL));
3180   PetscCall(VecSet(x, -1.0));
3181   PetscCall(VecDuplicate(x, &cmap));
3182   PetscCall(VecSet(cmap, -1.0));
3183 
3184   /* Get start indices */
3185   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3186   isstart -= ncols;
3187   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3188 
3189   PetscCall(ISGetIndices(iscol, &is_idx));
3190   PetscCall(VecGetArray(x, &xarray));
3191   PetscCall(VecGetArray(cmap, &cmaparray));
3192   PetscCall(PetscMalloc1(ncols, &idx));
3193   for (i = 0; i < ncols; i++) {
3194     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3195     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3196     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3197   }
3198   PetscCall(VecRestoreArray(x, &xarray));
3199   PetscCall(VecRestoreArray(cmap, &cmaparray));
3200   PetscCall(ISRestoreIndices(iscol, &is_idx));
3201 
3202   /* Get iscol_d */
3203   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3204   PetscCall(ISGetBlockSize(iscol, &i));
3205   PetscCall(ISSetBlockSize(*iscol_d, i));
3206 
3207   /* Get isrow_d */
3208   PetscCall(ISGetLocalSize(isrow, &m));
3209   rstart = mat->rmap->rstart;
3210   PetscCall(PetscMalloc1(m, &idx));
3211   PetscCall(ISGetIndices(isrow, &is_idx));
3212   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3213   PetscCall(ISRestoreIndices(isrow, &is_idx));
3214 
3215   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3216   PetscCall(ISGetBlockSize(isrow, &i));
3217   PetscCall(ISSetBlockSize(*isrow_d, i));
3218 
3219   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3220   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3221   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3222 
3223   PetscCall(VecDuplicate(lvec, &lcmap));
3224 
3225   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3226   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3227 
3228   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3229   /* off-process column indices */
3230   count = 0;
3231   PetscCall(PetscMalloc1(Bn, &idx));
3232   PetscCall(PetscMalloc1(Bn, &cmap1));
3233 
3234   PetscCall(VecGetArray(lvec, &xarray));
3235   PetscCall(VecGetArray(lcmap, &cmaparray));
3236   for (i = 0; i < Bn; i++) {
3237     if (PetscRealPart(xarray[i]) > -1.0) {
3238       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3239       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3240       count++;
3241     }
3242   }
3243   PetscCall(VecRestoreArray(lvec, &xarray));
3244   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3245 
3246   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3247   /* cannot ensure iscol_o has same blocksize as iscol! */
3248 
3249   PetscCall(PetscFree(idx));
3250   *garray = cmap1;
3251 
3252   PetscCall(VecDestroy(&x));
3253   PetscCall(VecDestroy(&cmap));
3254   PetscCall(VecDestroy(&lcmap));
3255   PetscFunctionReturn(PETSC_SUCCESS);
3256 }
3257 
3258 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3259 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3260 {
3261   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3262   Mat         M = NULL;
3263   MPI_Comm    comm;
3264   IS          iscol_d, isrow_d, iscol_o;
3265   Mat         Asub = NULL, Bsub = NULL;
3266   PetscInt    n, count, M_size, N_size;
3267 
3268   PetscFunctionBegin;
3269   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3270 
3271   if (call == MAT_REUSE_MATRIX) {
3272     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3273     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3274     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3275 
3276     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3277     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3278 
3279     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3280     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3281 
3282     /* Update diagonal and off-diagonal portions of submat */
3283     asub = (Mat_MPIAIJ *)(*submat)->data;
3284     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3285     PetscCall(ISGetLocalSize(iscol_o, &n));
3286     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3287     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3288     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3289 
3290   } else { /* call == MAT_INITIAL_MATRIX) */
3291     PetscInt *garray, *garray_compact;
3292     PetscInt  BsubN;
3293 
3294     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3295     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3296 
3297     /* Create local submatrices Asub and Bsub */
3298     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3299     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3300 
3301     // Compact garray so its not of size Bn
3302     PetscCall(ISGetSize(iscol_o, &count));
3303     PetscCall(PetscMalloc1(count, &garray_compact));
3304     PetscCall(PetscArraycpy(garray_compact, garray, count));
3305 
3306     /* Create submatrix M */
3307     PetscCall(ISGetSize(isrow, &M_size));
3308     PetscCall(ISGetSize(iscol, &N_size));
3309     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M));
3310 
3311     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3312     asub = (Mat_MPIAIJ *)M->data;
3313 
3314     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3315     n = asub->B->cmap->N;
3316     if (BsubN > n) {
3317       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3318       const PetscInt *idx;
3319       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3320       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3321 
3322       PetscCall(PetscMalloc1(n, &idx_new));
3323       j = 0;
3324       PetscCall(ISGetIndices(iscol_o, &idx));
3325       for (i = 0; i < n; i++) {
3326         if (j >= BsubN) break;
3327         while (subgarray[i] > garray[j]) j++;
3328 
3329         PetscCheck(subgarray[i] == garray[j], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3330         idx_new[i] = idx[j++];
3331       }
3332       PetscCall(ISRestoreIndices(iscol_o, &idx));
3333 
3334       PetscCall(ISDestroy(&iscol_o));
3335       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3336 
3337     } else PetscCheck(BsubN >= n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3338 
3339     PetscCall(PetscFree(garray));
3340     *submat = M;
3341 
3342     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3343     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3344     PetscCall(ISDestroy(&isrow_d));
3345 
3346     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3347     PetscCall(ISDestroy(&iscol_d));
3348 
3349     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3350     PetscCall(ISDestroy(&iscol_o));
3351   }
3352   PetscFunctionReturn(PETSC_SUCCESS);
3353 }
3354 
3355 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3356 {
3357   IS        iscol_local = NULL, isrow_d;
3358   PetscInt  csize;
3359   PetscInt  n, i, j, start, end;
3360   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3361   MPI_Comm  comm;
3362 
3363   PetscFunctionBegin;
3364   /* If isrow has same processor distribution as mat,
3365      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3366   if (call == MAT_REUSE_MATRIX) {
3367     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3368     if (isrow_d) {
3369       sameRowDist  = PETSC_TRUE;
3370       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3371     } else {
3372       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3373       if (iscol_local) {
3374         sameRowDist  = PETSC_TRUE;
3375         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3376       }
3377     }
3378   } else {
3379     /* Check if isrow has same processor distribution as mat */
3380     sameDist[0] = PETSC_FALSE;
3381     PetscCall(ISGetLocalSize(isrow, &n));
3382     if (!n) {
3383       sameDist[0] = PETSC_TRUE;
3384     } else {
3385       PetscCall(ISGetMinMax(isrow, &i, &j));
3386       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3387       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3388     }
3389 
3390     /* Check if iscol has same processor distribution as mat */
3391     sameDist[1] = PETSC_FALSE;
3392     PetscCall(ISGetLocalSize(iscol, &n));
3393     if (!n) {
3394       sameDist[1] = PETSC_TRUE;
3395     } else {
3396       PetscCall(ISGetMinMax(iscol, &i, &j));
3397       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3398       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3399     }
3400 
3401     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3402     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPI_C_BOOL, MPI_LAND, comm));
3403     sameRowDist = tsameDist[0];
3404   }
3405 
3406   if (sameRowDist) {
3407     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3408       /* isrow and iscol have same processor distribution as mat */
3409       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3410       PetscFunctionReturn(PETSC_SUCCESS);
3411     } else { /* sameRowDist */
3412       /* isrow has same processor distribution as mat */
3413       if (call == MAT_INITIAL_MATRIX) {
3414         PetscBool sorted;
3415         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3416         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3417         PetscCall(ISGetSize(iscol, &i));
3418         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3419 
3420         PetscCall(ISSorted(iscol_local, &sorted));
3421         if (sorted) {
3422           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3423           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3424           PetscFunctionReturn(PETSC_SUCCESS);
3425         }
3426       } else { /* call == MAT_REUSE_MATRIX */
3427         IS iscol_sub;
3428         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3429         if (iscol_sub) {
3430           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3431           PetscFunctionReturn(PETSC_SUCCESS);
3432         }
3433       }
3434     }
3435   }
3436 
3437   /* General case: iscol -> iscol_local which has global size of iscol */
3438   if (call == MAT_REUSE_MATRIX) {
3439     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3440     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3441   } else {
3442     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3443   }
3444 
3445   PetscCall(ISGetLocalSize(iscol, &csize));
3446   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3447 
3448   if (call == MAT_INITIAL_MATRIX) {
3449     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3450     PetscCall(ISDestroy(&iscol_local));
3451   }
3452   PetscFunctionReturn(PETSC_SUCCESS);
3453 }
3454 
3455 /*@C
3456   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3457   and "off-diagonal" part of the matrix in CSR format.
3458 
3459   Collective
3460 
3461   Input Parameters:
3462 + comm   - MPI communicator
3463 . M      - the global row size
3464 . N      - the global column size
3465 . A      - "diagonal" portion of matrix
3466 . B      - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray
3467 - garray - either `NULL` or the global index of `B` columns. If not `NULL`, it should be allocated by `PetscMalloc1()` and will be owned by `mat` thereafter.
3468 
3469   Output Parameter:
3470 . mat - the matrix, with input `A` as its local diagonal matrix
3471 
3472   Level: advanced
3473 
3474   Notes:
3475   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3476 
3477   `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore.
3478 
3479   If `garray` is `NULL`, `B` will be compacted to use local indices. In this sense, `B`'s sparsity pattern (nonzerostate) will be changed. If `B` is a device matrix, we need to somehow also update
3480   `B`'s copy on device.  We do so by increasing `B`'s nonzerostate. In use of `B` on device, device matrix types should detect this change (ref. internal routines `MatSeqAIJCUSPARSECopyToGPU()` or
3481   `MatAssemblyEnd_SeqAIJKokkos()`) and will just destroy and then recreate the device copy of `B`. It is not optimal, but is easy to implement and less hacky. To avoid this overhead, try to compute `garray`
3482   yourself, see algorithms in the private function `MatSetUpMultiply_MPIAIJ()`.
3483 
3484   The `NULL`-ness of `garray` doesn't need to be collective, in other words, `garray` can be `NULL` on some processes while not on others.
3485 
3486 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3487 @*/
3488 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat)
3489 {
3490   PetscInt    m, n;
3491   MatType     mpi_mat_type;
3492   Mat_MPIAIJ *mpiaij;
3493   Mat         C;
3494 
3495   PetscFunctionBegin;
3496   PetscCall(MatCreate(comm, &C));
3497   PetscCall(MatGetSize(A, &m, &n));
3498   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3499   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3500 
3501   PetscCall(MatSetSizes(C, m, n, M, N));
3502   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3503   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3504   PetscCall(MatSetType(C, mpi_mat_type));
3505   if (!garray) {
3506     const PetscScalar *ba;
3507 
3508     B->nonzerostate++;
3509     PetscCall(MatSeqAIJGetArrayRead(B, &ba)); /* Since we will destroy B's device copy, we need to make sure the host copy is up to date */
3510     PetscCall(MatSeqAIJRestoreArrayRead(B, &ba));
3511   }
3512 
3513   PetscCall(MatSetBlockSizes(C, A->rmap->bs, A->cmap->bs));
3514   PetscCall(PetscLayoutSetUp(C->rmap));
3515   PetscCall(PetscLayoutSetUp(C->cmap));
3516 
3517   mpiaij              = (Mat_MPIAIJ *)C->data;
3518   mpiaij->A           = A;
3519   mpiaij->B           = B;
3520   mpiaij->garray      = garray;
3521   C->preallocated     = PETSC_TRUE;
3522   C->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */
3523 
3524   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3525   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3526   /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and
3527    also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced
3528    */
3529   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3530   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3531   PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3532   *mat = C;
3533   PetscFunctionReturn(PETSC_SUCCESS);
3534 }
3535 
3536 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3537 
3538 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3539 {
3540   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3541   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3542   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3543   Mat             M, Msub, B = a->B;
3544   MatScalar      *aa;
3545   Mat_SeqAIJ     *aij;
3546   PetscInt       *garray = a->garray, *colsub, Ncols;
3547   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3548   IS              iscol_sub, iscmap;
3549   const PetscInt *is_idx, *cmap;
3550   PetscBool       allcolumns = PETSC_FALSE;
3551   MPI_Comm        comm;
3552 
3553   PetscFunctionBegin;
3554   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3555   if (call == MAT_REUSE_MATRIX) {
3556     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3557     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3558     PetscCall(ISGetLocalSize(iscol_sub, &count));
3559 
3560     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3561     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3562 
3563     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3564     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3565 
3566     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3567 
3568   } else { /* call == MAT_INITIAL_MATRIX) */
3569     PetscBool flg;
3570 
3571     PetscCall(ISGetLocalSize(iscol, &n));
3572     PetscCall(ISGetSize(iscol, &Ncols));
3573 
3574     /* (1) iscol -> nonscalable iscol_local */
3575     /* Check for special case: each processor gets entire matrix columns */
3576     PetscCall(ISIdentity(iscol_local, &flg));
3577     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3578     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3579     if (allcolumns) {
3580       iscol_sub = iscol_local;
3581       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3582       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3583 
3584     } else {
3585       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3586       PetscInt *idx, *cmap1, k;
3587       PetscCall(PetscMalloc1(Ncols, &idx));
3588       PetscCall(PetscMalloc1(Ncols, &cmap1));
3589       PetscCall(ISGetIndices(iscol_local, &is_idx));
3590       count = 0;
3591       k     = 0;
3592       for (i = 0; i < Ncols; i++) {
3593         j = is_idx[i];
3594         if (j >= cstart && j < cend) {
3595           /* diagonal part of mat */
3596           idx[count]     = j;
3597           cmap1[count++] = i; /* column index in submat */
3598         } else if (Bn) {
3599           /* off-diagonal part of mat */
3600           if (j == garray[k]) {
3601             idx[count]     = j;
3602             cmap1[count++] = i; /* column index in submat */
3603           } else if (j > garray[k]) {
3604             while (j > garray[k] && k < Bn - 1) k++;
3605             if (j == garray[k]) {
3606               idx[count]     = j;
3607               cmap1[count++] = i; /* column index in submat */
3608             }
3609           }
3610         }
3611       }
3612       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3613 
3614       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3615       PetscCall(ISGetBlockSize(iscol, &cbs));
3616       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3617 
3618       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3619     }
3620 
3621     /* (3) Create sequential Msub */
3622     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3623   }
3624 
3625   PetscCall(ISGetLocalSize(iscol_sub, &count));
3626   aij = (Mat_SeqAIJ *)Msub->data;
3627   ii  = aij->i;
3628   PetscCall(ISGetIndices(iscmap, &cmap));
3629 
3630   /*
3631       m - number of local rows
3632       Ncols - number of columns (same on all processors)
3633       rstart - first row in new global matrix generated
3634   */
3635   PetscCall(MatGetSize(Msub, &m, NULL));
3636 
3637   if (call == MAT_INITIAL_MATRIX) {
3638     /* (4) Create parallel newmat */
3639     PetscMPIInt rank, size;
3640     PetscInt    csize;
3641 
3642     PetscCallMPI(MPI_Comm_size(comm, &size));
3643     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3644 
3645     /*
3646         Determine the number of non-zeros in the diagonal and off-diagonal
3647         portions of the matrix in order to do correct preallocation
3648     */
3649 
3650     /* first get start and end of "diagonal" columns */
3651     PetscCall(ISGetLocalSize(iscol, &csize));
3652     if (csize == PETSC_DECIDE) {
3653       PetscCall(ISGetSize(isrow, &mglobal));
3654       if (mglobal == Ncols) { /* square matrix */
3655         nlocal = m;
3656       } else {
3657         nlocal = Ncols / size + ((Ncols % size) > rank);
3658       }
3659     } else {
3660       nlocal = csize;
3661     }
3662     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3663     rstart = rend - nlocal;
3664     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3665 
3666     /* next, compute all the lengths */
3667     jj = aij->j;
3668     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3669     olens = dlens + m;
3670     for (i = 0; i < m; i++) {
3671       jend = ii[i + 1] - ii[i];
3672       olen = 0;
3673       dlen = 0;
3674       for (j = 0; j < jend; j++) {
3675         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3676         else dlen++;
3677         jj++;
3678       }
3679       olens[i] = olen;
3680       dlens[i] = dlen;
3681     }
3682 
3683     PetscCall(ISGetBlockSize(isrow, &bs));
3684     PetscCall(ISGetBlockSize(iscol, &cbs));
3685 
3686     PetscCall(MatCreate(comm, &M));
3687     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3688     PetscCall(MatSetBlockSizes(M, bs, cbs));
3689     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3690     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3691     PetscCall(PetscFree(dlens));
3692 
3693   } else { /* call == MAT_REUSE_MATRIX */
3694     M = *newmat;
3695     PetscCall(MatGetLocalSize(M, &i, NULL));
3696     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3697     PetscCall(MatZeroEntries(M));
3698     /*
3699          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3700        rather than the slower MatSetValues().
3701     */
3702     M->was_assembled = PETSC_TRUE;
3703     M->assembled     = PETSC_FALSE;
3704   }
3705 
3706   /* (5) Set values of Msub to *newmat */
3707   PetscCall(PetscMalloc1(count, &colsub));
3708   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3709 
3710   jj = aij->j;
3711   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3712   for (i = 0; i < m; i++) {
3713     row = rstart + i;
3714     nz  = ii[i + 1] - ii[i];
3715     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3716     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3717     jj += nz;
3718     aa += nz;
3719   }
3720   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3721   PetscCall(ISRestoreIndices(iscmap, &cmap));
3722 
3723   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3724   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3725 
3726   PetscCall(PetscFree(colsub));
3727 
3728   /* save Msub, iscol_sub and iscmap used in processor for next request */
3729   if (call == MAT_INITIAL_MATRIX) {
3730     *newmat = M;
3731     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3732     PetscCall(MatDestroy(&Msub));
3733 
3734     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3735     PetscCall(ISDestroy(&iscol_sub));
3736 
3737     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3738     PetscCall(ISDestroy(&iscmap));
3739 
3740     if (iscol_local) {
3741       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3742       PetscCall(ISDestroy(&iscol_local));
3743     }
3744   }
3745   PetscFunctionReturn(PETSC_SUCCESS);
3746 }
3747 
3748 /*
3749     Not great since it makes two copies of the submatrix, first an SeqAIJ
3750   in local and then by concatenating the local matrices the end result.
3751   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3752 
3753   This requires a sequential iscol with all indices.
3754 */
3755 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3756 {
3757   PetscMPIInt rank, size;
3758   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3759   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3760   Mat         M, Mreuse;
3761   MatScalar  *aa, *vwork;
3762   MPI_Comm    comm;
3763   Mat_SeqAIJ *aij;
3764   PetscBool   colflag, allcolumns = PETSC_FALSE;
3765 
3766   PetscFunctionBegin;
3767   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3768   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3769   PetscCallMPI(MPI_Comm_size(comm, &size));
3770 
3771   /* Check for special case: each processor gets entire matrix columns */
3772   PetscCall(ISIdentity(iscol, &colflag));
3773   PetscCall(ISGetLocalSize(iscol, &n));
3774   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3775   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3776 
3777   if (call == MAT_REUSE_MATRIX) {
3778     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3779     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3780     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3781   } else {
3782     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3783   }
3784 
3785   /*
3786       m - number of local rows
3787       n - number of columns (same on all processors)
3788       rstart - first row in new global matrix generated
3789   */
3790   PetscCall(MatGetSize(Mreuse, &m, &n));
3791   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3792   if (call == MAT_INITIAL_MATRIX) {
3793     aij = (Mat_SeqAIJ *)Mreuse->data;
3794     ii  = aij->i;
3795     jj  = aij->j;
3796 
3797     /*
3798         Determine the number of non-zeros in the diagonal and off-diagonal
3799         portions of the matrix in order to do correct preallocation
3800     */
3801 
3802     /* first get start and end of "diagonal" columns */
3803     if (csize == PETSC_DECIDE) {
3804       PetscCall(ISGetSize(isrow, &mglobal));
3805       if (mglobal == n) { /* square matrix */
3806         nlocal = m;
3807       } else {
3808         nlocal = n / size + ((n % size) > rank);
3809       }
3810     } else {
3811       nlocal = csize;
3812     }
3813     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3814     rstart = rend - nlocal;
3815     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3816 
3817     /* next, compute all the lengths */
3818     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3819     olens = dlens + m;
3820     for (i = 0; i < m; i++) {
3821       jend = ii[i + 1] - ii[i];
3822       olen = 0;
3823       dlen = 0;
3824       for (j = 0; j < jend; j++) {
3825         if (*jj < rstart || *jj >= rend) olen++;
3826         else dlen++;
3827         jj++;
3828       }
3829       olens[i] = olen;
3830       dlens[i] = dlen;
3831     }
3832     PetscCall(MatCreate(comm, &M));
3833     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3834     PetscCall(MatSetBlockSizes(M, bs, cbs));
3835     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3836     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3837     PetscCall(PetscFree(dlens));
3838   } else {
3839     PetscInt ml, nl;
3840 
3841     M = *newmat;
3842     PetscCall(MatGetLocalSize(M, &ml, &nl));
3843     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3844     PetscCall(MatZeroEntries(M));
3845     /*
3846          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3847        rather than the slower MatSetValues().
3848     */
3849     M->was_assembled = PETSC_TRUE;
3850     M->assembled     = PETSC_FALSE;
3851   }
3852   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3853   aij = (Mat_SeqAIJ *)Mreuse->data;
3854   ii  = aij->i;
3855   jj  = aij->j;
3856 
3857   /* trigger copy to CPU if needed */
3858   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3859   for (i = 0; i < m; i++) {
3860     row   = rstart + i;
3861     nz    = ii[i + 1] - ii[i];
3862     cwork = jj;
3863     jj    = PetscSafePointerPlusOffset(jj, nz);
3864     vwork = aa;
3865     aa    = PetscSafePointerPlusOffset(aa, nz);
3866     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3867   }
3868   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3869 
3870   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3871   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3872   *newmat = M;
3873 
3874   /* save submatrix used in processor for next request */
3875   if (call == MAT_INITIAL_MATRIX) {
3876     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3877     PetscCall(MatDestroy(&Mreuse));
3878   }
3879   PetscFunctionReturn(PETSC_SUCCESS);
3880 }
3881 
3882 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3883 {
3884   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3885   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3886   const PetscInt *JJ;
3887   PetscBool       nooffprocentries;
3888   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3889 
3890   PetscFunctionBegin;
3891   PetscCall(PetscLayoutSetUp(B->rmap));
3892   PetscCall(PetscLayoutSetUp(B->cmap));
3893   m       = B->rmap->n;
3894   cstart  = B->cmap->rstart;
3895   cend    = B->cmap->rend;
3896   rstart  = B->rmap->rstart;
3897   irstart = Ii[0];
3898 
3899   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3900 
3901   if (PetscDefined(USE_DEBUG)) {
3902     for (i = 0; i < m; i++) {
3903       nnz = Ii[i + 1] - Ii[i];
3904       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3905       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3906       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3907       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3908     }
3909   }
3910 
3911   for (i = 0; i < m; i++) {
3912     nnz     = Ii[i + 1] - Ii[i];
3913     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3914     nnz_max = PetscMax(nnz_max, nnz);
3915     d       = 0;
3916     for (j = 0; j < nnz; j++) {
3917       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3918     }
3919     d_nnz[i] = d;
3920     o_nnz[i] = nnz - d;
3921   }
3922   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3923   PetscCall(PetscFree2(d_nnz, o_nnz));
3924 
3925   for (i = 0; i < m; i++) {
3926     ii = i + rstart;
3927     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3928   }
3929   nooffprocentries    = B->nooffprocentries;
3930   B->nooffprocentries = PETSC_TRUE;
3931   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3932   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3933   B->nooffprocentries = nooffprocentries;
3934 
3935   /* count number of entries below block diagonal */
3936   PetscCall(PetscFree(Aij->ld));
3937   PetscCall(PetscCalloc1(m, &ld));
3938   Aij->ld = ld;
3939   for (i = 0; i < m; i++) {
3940     nnz = Ii[i + 1] - Ii[i];
3941     j   = 0;
3942     while (j < nnz && J[j] < cstart) j++;
3943     ld[i] = j;
3944     if (J) J += nnz;
3945   }
3946 
3947   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3948   PetscFunctionReturn(PETSC_SUCCESS);
3949 }
3950 
3951 /*@
3952   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3953   (the default parallel PETSc format).
3954 
3955   Collective
3956 
3957   Input Parameters:
3958 + B - the matrix
3959 . i - the indices into `j` for the start of each local row (indices start with zero)
3960 . j - the column indices for each local row (indices start with zero)
3961 - v - optional values in the matrix
3962 
3963   Level: developer
3964 
3965   Notes:
3966   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3967   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3968   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3969 
3970   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3971 
3972   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3973 
3974   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3975 
3976   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3977   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3978 
3979   The format which is used for the sparse matrix input, is equivalent to a
3980   row-major ordering.. i.e for the following matrix, the input data expected is
3981   as shown
3982 .vb
3983         1 0 0
3984         2 0 3     P0
3985        -------
3986         4 5 6     P1
3987 
3988      Process0 [P0] rows_owned=[0,1]
3989         i =  {0,1,3}  [size = nrow+1  = 2+1]
3990         j =  {0,0,2}  [size = 3]
3991         v =  {1,2,3}  [size = 3]
3992 
3993      Process1 [P1] rows_owned=[2]
3994         i =  {0,3}    [size = nrow+1  = 1+1]
3995         j =  {0,1,2}  [size = 3]
3996         v =  {4,5,6}  [size = 3]
3997 .ve
3998 
3999 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4000           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4001 @*/
4002 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4003 {
4004   PetscFunctionBegin;
4005   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4006   PetscFunctionReturn(PETSC_SUCCESS);
4007 }
4008 
4009 /*@
4010   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4011   (the default parallel PETSc format).  For good matrix assembly performance
4012   the user should preallocate the matrix storage by setting the parameters
4013   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4014 
4015   Collective
4016 
4017   Input Parameters:
4018 + B     - the matrix
4019 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4020            (same value is used for all local rows)
4021 . d_nnz - array containing the number of nonzeros in the various rows of the
4022            DIAGONAL portion of the local submatrix (possibly different for each row)
4023            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4024            The size of this array is equal to the number of local rows, i.e 'm'.
4025            For matrices that will be factored, you must leave room for (and set)
4026            the diagonal entry even if it is zero.
4027 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4028            submatrix (same value is used for all local rows).
4029 - o_nnz - array containing the number of nonzeros in the various rows of the
4030            OFF-DIAGONAL portion of the local submatrix (possibly different for
4031            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4032            structure. The size of this array is equal to the number
4033            of local rows, i.e 'm'.
4034 
4035   Example Usage:
4036   Consider the following 8x8 matrix with 34 non-zero values, that is
4037   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4038   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4039   as follows
4040 
4041 .vb
4042             1  2  0  |  0  3  0  |  0  4
4043     Proc0   0  5  6  |  7  0  0  |  8  0
4044             9  0 10  | 11  0  0  | 12  0
4045     -------------------------------------
4046            13  0 14  | 15 16 17  |  0  0
4047     Proc1   0 18  0  | 19 20 21  |  0  0
4048             0  0  0  | 22 23  0  | 24  0
4049     -------------------------------------
4050     Proc2  25 26 27  |  0  0 28  | 29  0
4051            30  0  0  | 31 32 33  |  0 34
4052 .ve
4053 
4054   This can be represented as a collection of submatrices as
4055 .vb
4056       A B C
4057       D E F
4058       G H I
4059 .ve
4060 
4061   Where the submatrices A,B,C are owned by proc0, D,E,F are
4062   owned by proc1, G,H,I are owned by proc2.
4063 
4064   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4065   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4066   The 'M','N' parameters are 8,8, and have the same values on all procs.
4067 
4068   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4069   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4070   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4071   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4072   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4073   matrix, and [DF] as another `MATSEQAIJ` matrix.
4074 
4075   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4076   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4077   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4078   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4079   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4080   In this case, the values of `d_nz`, `o_nz` are
4081 .vb
4082      proc0  dnz = 2, o_nz = 2
4083      proc1  dnz = 3, o_nz = 2
4084      proc2  dnz = 1, o_nz = 4
4085 .ve
4086   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4087   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4088   for proc3. i.e we are using 12+15+10=37 storage locations to store
4089   34 values.
4090 
4091   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4092   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4093   In the above case the values for `d_nnz`, `o_nnz` are
4094 .vb
4095      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4096      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4097      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4098 .ve
4099   Here the space allocated is sum of all the above values i.e 34, and
4100   hence pre-allocation is perfect.
4101 
4102   Level: intermediate
4103 
4104   Notes:
4105   If the *_nnz parameter is given then the *_nz parameter is ignored
4106 
4107   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4108   storage.  The stored row and column indices begin with zero.
4109   See [Sparse Matrices](sec_matsparse) for details.
4110 
4111   The parallel matrix is partitioned such that the first m0 rows belong to
4112   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4113   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4114 
4115   The DIAGONAL portion of the local submatrix of a processor can be defined
4116   as the submatrix which is obtained by extraction the part corresponding to
4117   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4118   first row that belongs to the processor, r2 is the last row belonging to
4119   the this processor, and c1-c2 is range of indices of the local part of a
4120   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4121   common case of a square matrix, the row and column ranges are the same and
4122   the DIAGONAL part is also square. The remaining portion of the local
4123   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4124 
4125   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4126 
4127   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4128   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4129   You can also run with the option `-info` and look for messages with the string
4130   malloc in them to see if additional memory allocation was needed.
4131 
4132 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4133           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4134 @*/
4135 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4136 {
4137   PetscFunctionBegin;
4138   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4139   PetscValidType(B, 1);
4140   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4141   PetscFunctionReturn(PETSC_SUCCESS);
4142 }
4143 
4144 /*@
4145   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4146   CSR format for the local rows.
4147 
4148   Collective
4149 
4150   Input Parameters:
4151 + comm - MPI communicator
4152 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4153 . n    - This value should be the same as the local size used in creating the
4154          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4155          calculated if `N` is given) For square matrices n is almost always `m`.
4156 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4157 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4158 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4159 . j    - global column indices
4160 - a    - optional matrix values
4161 
4162   Output Parameter:
4163 . mat - the matrix
4164 
4165   Level: intermediate
4166 
4167   Notes:
4168   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4169   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4170   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4171 
4172   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4173 
4174   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4175 
4176   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4177   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4178 
4179   The format which is used for the sparse matrix input, is equivalent to a
4180   row-major ordering, i.e., for the following matrix, the input data expected is
4181   as shown
4182 .vb
4183         1 0 0
4184         2 0 3     P0
4185        -------
4186         4 5 6     P1
4187 
4188      Process0 [P0] rows_owned=[0,1]
4189         i =  {0,1,3}  [size = nrow+1  = 2+1]
4190         j =  {0,0,2}  [size = 3]
4191         v =  {1,2,3}  [size = 3]
4192 
4193      Process1 [P1] rows_owned=[2]
4194         i =  {0,3}    [size = nrow+1  = 1+1]
4195         j =  {0,1,2}  [size = 3]
4196         v =  {4,5,6}  [size = 3]
4197 .ve
4198 
4199 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4200           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4201 @*/
4202 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4203 {
4204   PetscFunctionBegin;
4205   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4206   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4207   PetscCall(MatCreate(comm, mat));
4208   PetscCall(MatSetSizes(*mat, m, n, M, N));
4209   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4210   PetscCall(MatSetType(*mat, MATMPIAIJ));
4211   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4212   PetscFunctionReturn(PETSC_SUCCESS);
4213 }
4214 
4215 /*@
4216   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4217   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4218   from `MatCreateMPIAIJWithArrays()`
4219 
4220   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4221 
4222   Collective
4223 
4224   Input Parameters:
4225 + mat - the matrix
4226 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4227 . n   - This value should be the same as the local size used in creating the
4228        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4229        calculated if N is given) For square matrices n is almost always m.
4230 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4231 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4232 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4233 . J   - column indices
4234 - v   - matrix values
4235 
4236   Level: deprecated
4237 
4238 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4239           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4240 @*/
4241 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4242 {
4243   PetscInt        nnz, i;
4244   PetscBool       nooffprocentries;
4245   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4246   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4247   PetscScalar    *ad, *ao;
4248   PetscInt        ldi, Iii, md;
4249   const PetscInt *Adi = Ad->i;
4250   PetscInt       *ld  = Aij->ld;
4251 
4252   PetscFunctionBegin;
4253   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4254   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4255   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4256   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4257 
4258   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4259   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4260 
4261   for (i = 0; i < m; i++) {
4262     if (PetscDefined(USE_DEBUG)) {
4263       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4264         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4265         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4266       }
4267     }
4268     nnz = Ii[i + 1] - Ii[i];
4269     Iii = Ii[i];
4270     ldi = ld[i];
4271     md  = Adi[i + 1] - Adi[i];
4272     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4273     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4274     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4275     ad += md;
4276     ao += nnz - md;
4277   }
4278   nooffprocentries      = mat->nooffprocentries;
4279   mat->nooffprocentries = PETSC_TRUE;
4280   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4281   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4282   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4283   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4284   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4285   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4286   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4287   mat->nooffprocentries = nooffprocentries;
4288   PetscFunctionReturn(PETSC_SUCCESS);
4289 }
4290 
4291 /*@
4292   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4293 
4294   Collective
4295 
4296   Input Parameters:
4297 + mat - the matrix
4298 - v   - matrix values, stored by row
4299 
4300   Level: intermediate
4301 
4302   Notes:
4303   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4304 
4305   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4306 
4307 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4308           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4309 @*/
4310 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4311 {
4312   PetscInt        nnz, i, m;
4313   PetscBool       nooffprocentries;
4314   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4315   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4316   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4317   PetscScalar    *ad, *ao;
4318   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4319   PetscInt        ldi, Iii, md;
4320   PetscInt       *ld = Aij->ld;
4321 
4322   PetscFunctionBegin;
4323   m = mat->rmap->n;
4324 
4325   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4326   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4327   Iii = 0;
4328   for (i = 0; i < m; i++) {
4329     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4330     ldi = ld[i];
4331     md  = Adi[i + 1] - Adi[i];
4332     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4333     ad += md;
4334     if (ao) {
4335       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4336       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4337       ao += nnz - md;
4338     }
4339     Iii += nnz;
4340   }
4341   nooffprocentries      = mat->nooffprocentries;
4342   mat->nooffprocentries = PETSC_TRUE;
4343   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4344   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4345   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4346   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4347   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4348   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4349   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4350   mat->nooffprocentries = nooffprocentries;
4351   PetscFunctionReturn(PETSC_SUCCESS);
4352 }
4353 
4354 /*@
4355   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4356   (the default parallel PETSc format).  For good matrix assembly performance
4357   the user should preallocate the matrix storage by setting the parameters
4358   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4359 
4360   Collective
4361 
4362   Input Parameters:
4363 + comm  - MPI communicator
4364 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4365           This value should be the same as the local size used in creating the
4366           y vector for the matrix-vector product y = Ax.
4367 . n     - This value should be the same as the local size used in creating the
4368           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4369           calculated if N is given) For square matrices n is almost always m.
4370 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4371 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4372 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4373           (same value is used for all local rows)
4374 . d_nnz - array containing the number of nonzeros in the various rows of the
4375           DIAGONAL portion of the local submatrix (possibly different for each row)
4376           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4377           The size of this array is equal to the number of local rows, i.e 'm'.
4378 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4379           submatrix (same value is used for all local rows).
4380 - o_nnz - array containing the number of nonzeros in the various rows of the
4381           OFF-DIAGONAL portion of the local submatrix (possibly different for
4382           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4383           structure. The size of this array is equal to the number
4384           of local rows, i.e 'm'.
4385 
4386   Output Parameter:
4387 . A - the matrix
4388 
4389   Options Database Keys:
4390 + -mat_no_inode                     - Do not use inodes
4391 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4392 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4393                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4394                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4395 
4396   Level: intermediate
4397 
4398   Notes:
4399   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4400   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4401   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4402 
4403   If the *_nnz parameter is given then the *_nz parameter is ignored
4404 
4405   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4406   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4407   storage requirements for this matrix.
4408 
4409   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4410   processor than it must be used on all processors that share the object for
4411   that argument.
4412 
4413   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4414   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4415 
4416   The user MUST specify either the local or global matrix dimensions
4417   (possibly both).
4418 
4419   The parallel matrix is partitioned across processors such that the
4420   first `m0` rows belong to process 0, the next `m1` rows belong to
4421   process 1, the next `m2` rows belong to process 2, etc., where
4422   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4423   values corresponding to [m x N] submatrix.
4424 
4425   The columns are logically partitioned with the n0 columns belonging
4426   to 0th partition, the next n1 columns belonging to the next
4427   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4428 
4429   The DIAGONAL portion of the local submatrix on any given processor
4430   is the submatrix corresponding to the rows and columns m,n
4431   corresponding to the given processor. i.e diagonal matrix on
4432   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4433   etc. The remaining portion of the local submatrix [m x (N-n)]
4434   constitute the OFF-DIAGONAL portion. The example below better
4435   illustrates this concept. The two matrices, the DIAGONAL portion and
4436   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4437 
4438   For a square global matrix we define each processor's diagonal portion
4439   to be its local rows and the corresponding columns (a square submatrix);
4440   each processor's off-diagonal portion encompasses the remainder of the
4441   local matrix (a rectangular submatrix).
4442 
4443   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4444 
4445   When calling this routine with a single process communicator, a matrix of
4446   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4447   type of communicator, use the construction mechanism
4448 .vb
4449   MatCreate(..., &A);
4450   MatSetType(A, MATMPIAIJ);
4451   MatSetSizes(A, m, n, M, N);
4452   MatMPIAIJSetPreallocation(A, ...);
4453 .ve
4454 
4455   By default, this format uses inodes (identical nodes) when possible.
4456   We search for consecutive rows with the same nonzero structure, thereby
4457   reusing matrix information to achieve increased efficiency.
4458 
4459   Example Usage:
4460   Consider the following 8x8 matrix with 34 non-zero values, that is
4461   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4462   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4463   as follows
4464 
4465 .vb
4466             1  2  0  |  0  3  0  |  0  4
4467     Proc0   0  5  6  |  7  0  0  |  8  0
4468             9  0 10  | 11  0  0  | 12  0
4469     -------------------------------------
4470            13  0 14  | 15 16 17  |  0  0
4471     Proc1   0 18  0  | 19 20 21  |  0  0
4472             0  0  0  | 22 23  0  | 24  0
4473     -------------------------------------
4474     Proc2  25 26 27  |  0  0 28  | 29  0
4475            30  0  0  | 31 32 33  |  0 34
4476 .ve
4477 
4478   This can be represented as a collection of submatrices as
4479 
4480 .vb
4481       A B C
4482       D E F
4483       G H I
4484 .ve
4485 
4486   Where the submatrices A,B,C are owned by proc0, D,E,F are
4487   owned by proc1, G,H,I are owned by proc2.
4488 
4489   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4490   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4491   The 'M','N' parameters are 8,8, and have the same values on all procs.
4492 
4493   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4494   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4495   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4496   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4497   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4498   matrix, and [DF] as another SeqAIJ matrix.
4499 
4500   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4501   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4502   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4503   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4504   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4505   In this case, the values of `d_nz`,`o_nz` are
4506 .vb
4507      proc0  dnz = 2, o_nz = 2
4508      proc1  dnz = 3, o_nz = 2
4509      proc2  dnz = 1, o_nz = 4
4510 .ve
4511   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4512   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4513   for proc3. i.e we are using 12+15+10=37 storage locations to store
4514   34 values.
4515 
4516   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4517   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4518   In the above case the values for d_nnz,o_nnz are
4519 .vb
4520      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4521      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4522      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4523 .ve
4524   Here the space allocated is sum of all the above values i.e 34, and
4525   hence pre-allocation is perfect.
4526 
4527 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4528           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4529           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4530 @*/
4531 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4532 {
4533   PetscMPIInt size;
4534 
4535   PetscFunctionBegin;
4536   PetscCall(MatCreate(comm, A));
4537   PetscCall(MatSetSizes(*A, m, n, M, N));
4538   PetscCallMPI(MPI_Comm_size(comm, &size));
4539   if (size > 1) {
4540     PetscCall(MatSetType(*A, MATMPIAIJ));
4541     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4542   } else {
4543     PetscCall(MatSetType(*A, MATSEQAIJ));
4544     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4545   }
4546   PetscFunctionReturn(PETSC_SUCCESS);
4547 }
4548 
4549 /*@C
4550   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4551 
4552   Not Collective
4553 
4554   Input Parameter:
4555 . A - The `MATMPIAIJ` matrix
4556 
4557   Output Parameters:
4558 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4559 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4560 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4561 
4562   Level: intermediate
4563 
4564   Note:
4565   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4566   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4567   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4568   local column numbers to global column numbers in the original matrix.
4569 
4570 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4571 @*/
4572 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4573 {
4574   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4575   PetscBool   flg;
4576 
4577   PetscFunctionBegin;
4578   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4579   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4580   if (Ad) *Ad = a->A;
4581   if (Ao) *Ao = a->B;
4582   if (colmap) *colmap = a->garray;
4583   PetscFunctionReturn(PETSC_SUCCESS);
4584 }
4585 
4586 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4587 {
4588   PetscInt     m, N, i, rstart, nnz, Ii;
4589   PetscInt    *indx;
4590   PetscScalar *values;
4591   MatType      rootType;
4592 
4593   PetscFunctionBegin;
4594   PetscCall(MatGetSize(inmat, &m, &N));
4595   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4596     PetscInt *dnz, *onz, sum, bs, cbs;
4597 
4598     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4599     /* Check sum(n) = N */
4600     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4601     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4602 
4603     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4604     rstart -= m;
4605 
4606     MatPreallocateBegin(comm, m, n, dnz, onz);
4607     for (i = 0; i < m; i++) {
4608       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4609       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4610       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4611     }
4612 
4613     PetscCall(MatCreate(comm, outmat));
4614     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4615     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4616     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4617     PetscCall(MatGetRootType_Private(inmat, &rootType));
4618     PetscCall(MatSetType(*outmat, rootType));
4619     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4620     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4621     MatPreallocateEnd(dnz, onz);
4622     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4623   }
4624 
4625   /* numeric phase */
4626   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4627   for (i = 0; i < m; i++) {
4628     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4629     Ii = i + rstart;
4630     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4631     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4632   }
4633   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4634   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4635   PetscFunctionReturn(PETSC_SUCCESS);
4636 }
4637 
4638 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data)
4639 {
4640   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data;
4641 
4642   PetscFunctionBegin;
4643   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4644   PetscCall(PetscFree(merge->id_r));
4645   PetscCall(PetscFree(merge->len_s));
4646   PetscCall(PetscFree(merge->len_r));
4647   PetscCall(PetscFree(merge->bi));
4648   PetscCall(PetscFree(merge->bj));
4649   PetscCall(PetscFree(merge->buf_ri[0]));
4650   PetscCall(PetscFree(merge->buf_ri));
4651   PetscCall(PetscFree(merge->buf_rj[0]));
4652   PetscCall(PetscFree(merge->buf_rj));
4653   PetscCall(PetscFree(merge->coi));
4654   PetscCall(PetscFree(merge->coj));
4655   PetscCall(PetscFree(merge->owners_co));
4656   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4657   PetscCall(PetscFree(merge));
4658   PetscFunctionReturn(PETSC_SUCCESS);
4659 }
4660 
4661 #include <../src/mat/utils/freespace.h>
4662 #include <petscbt.h>
4663 
4664 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4665 {
4666   MPI_Comm             comm;
4667   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4668   PetscMPIInt          size, rank, taga, *len_s;
4669   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4670   PetscMPIInt          proc, k;
4671   PetscInt           **buf_ri, **buf_rj;
4672   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4673   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4674   MPI_Request         *s_waits, *r_waits;
4675   MPI_Status          *status;
4676   const MatScalar     *aa, *a_a;
4677   MatScalar          **abuf_r, *ba_i;
4678   Mat_Merge_SeqsToMPI *merge;
4679   PetscContainer       container;
4680 
4681   PetscFunctionBegin;
4682   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4683   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4684 
4685   PetscCallMPI(MPI_Comm_size(comm, &size));
4686   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4687 
4688   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4689   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4690   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4691   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4692   aa = a_a;
4693 
4694   bi     = merge->bi;
4695   bj     = merge->bj;
4696   buf_ri = merge->buf_ri;
4697   buf_rj = merge->buf_rj;
4698 
4699   PetscCall(PetscMalloc1(size, &status));
4700   owners = merge->rowmap->range;
4701   len_s  = merge->len_s;
4702 
4703   /* send and recv matrix values */
4704   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4705   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4706 
4707   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4708   for (proc = 0, k = 0; proc < size; proc++) {
4709     if (!len_s[proc]) continue;
4710     i = owners[proc];
4711     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4712     k++;
4713   }
4714 
4715   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4716   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4717   PetscCall(PetscFree(status));
4718 
4719   PetscCall(PetscFree(s_waits));
4720   PetscCall(PetscFree(r_waits));
4721 
4722   /* insert mat values of mpimat */
4723   PetscCall(PetscMalloc1(N, &ba_i));
4724   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4725 
4726   for (k = 0; k < merge->nrecv; k++) {
4727     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4728     nrows       = *buf_ri_k[k];
4729     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4730     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4731   }
4732 
4733   /* set values of ba */
4734   m = merge->rowmap->n;
4735   for (i = 0; i < m; i++) {
4736     arow = owners[rank] + i;
4737     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4738     bnzi = bi[i + 1] - bi[i];
4739     PetscCall(PetscArrayzero(ba_i, bnzi));
4740 
4741     /* add local non-zero vals of this proc's seqmat into ba */
4742     anzi   = ai[arow + 1] - ai[arow];
4743     aj     = a->j + ai[arow];
4744     aa     = a_a + ai[arow];
4745     nextaj = 0;
4746     for (j = 0; nextaj < anzi; j++) {
4747       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4748         ba_i[j] += aa[nextaj++];
4749       }
4750     }
4751 
4752     /* add received vals into ba */
4753     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4754       /* i-th row */
4755       if (i == *nextrow[k]) {
4756         anzi   = *(nextai[k] + 1) - *nextai[k];
4757         aj     = buf_rj[k] + *nextai[k];
4758         aa     = abuf_r[k] + *nextai[k];
4759         nextaj = 0;
4760         for (j = 0; nextaj < anzi; j++) {
4761           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4762             ba_i[j] += aa[nextaj++];
4763           }
4764         }
4765         nextrow[k]++;
4766         nextai[k]++;
4767       }
4768     }
4769     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4770   }
4771   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4772   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4773   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4774 
4775   PetscCall(PetscFree(abuf_r[0]));
4776   PetscCall(PetscFree(abuf_r));
4777   PetscCall(PetscFree(ba_i));
4778   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4779   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4780   PetscFunctionReturn(PETSC_SUCCESS);
4781 }
4782 
4783 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4784 {
4785   Mat                  B_mpi;
4786   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4787   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4788   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4789   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4790   PetscInt             len, *dnz, *onz, bs, cbs;
4791   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4792   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4793   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4794   MPI_Status          *status;
4795   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4796   PetscBT              lnkbt;
4797   Mat_Merge_SeqsToMPI *merge;
4798   PetscContainer       container;
4799 
4800   PetscFunctionBegin;
4801   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4802 
4803   /* make sure it is a PETSc comm */
4804   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4805   PetscCallMPI(MPI_Comm_size(comm, &size));
4806   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4807 
4808   PetscCall(PetscNew(&merge));
4809   PetscCall(PetscMalloc1(size, &status));
4810 
4811   /* determine row ownership */
4812   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4813   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4814   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4815   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4816   PetscCall(PetscLayoutSetUp(merge->rowmap));
4817   PetscCall(PetscMalloc1(size, &len_si));
4818   PetscCall(PetscMalloc1(size, &merge->len_s));
4819 
4820   m      = merge->rowmap->n;
4821   owners = merge->rowmap->range;
4822 
4823   /* determine the number of messages to send, their lengths */
4824   len_s = merge->len_s;
4825 
4826   len          = 0; /* length of buf_si[] */
4827   merge->nsend = 0;
4828   for (PetscMPIInt proc = 0; proc < size; proc++) {
4829     len_si[proc] = 0;
4830     if (proc == rank) {
4831       len_s[proc] = 0;
4832     } else {
4833       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4834       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4835     }
4836     if (len_s[proc]) {
4837       merge->nsend++;
4838       nrows = 0;
4839       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4840         if (ai[i + 1] > ai[i]) nrows++;
4841       }
4842       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4843       len += len_si[proc];
4844     }
4845   }
4846 
4847   /* determine the number and length of messages to receive for ij-structure */
4848   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4849   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4850 
4851   /* post the Irecv of j-structure */
4852   PetscCall(PetscCommGetNewTag(comm, &tagj));
4853   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4854 
4855   /* post the Isend of j-structure */
4856   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4857 
4858   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4859     if (!len_s[proc]) continue;
4860     i = owners[proc];
4861     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4862     k++;
4863   }
4864 
4865   /* receives and sends of j-structure are complete */
4866   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4867   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4868 
4869   /* send and recv i-structure */
4870   PetscCall(PetscCommGetNewTag(comm, &tagi));
4871   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4872 
4873   PetscCall(PetscMalloc1(len + 1, &buf_s));
4874   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4875   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4876     if (!len_s[proc]) continue;
4877     /* form outgoing message for i-structure:
4878          buf_si[0]:                 nrows to be sent
4879                [1:nrows]:           row index (global)
4880                [nrows+1:2*nrows+1]: i-structure index
4881     */
4882     nrows       = len_si[proc] / 2 - 1;
4883     buf_si_i    = buf_si + nrows + 1;
4884     buf_si[0]   = nrows;
4885     buf_si_i[0] = 0;
4886     nrows       = 0;
4887     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4888       anzi = ai[i + 1] - ai[i];
4889       if (anzi) {
4890         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4891         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4892         nrows++;
4893       }
4894     }
4895     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4896     k++;
4897     buf_si += len_si[proc];
4898   }
4899 
4900   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4901   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4902 
4903   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4904   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4905 
4906   PetscCall(PetscFree(len_si));
4907   PetscCall(PetscFree(len_ri));
4908   PetscCall(PetscFree(rj_waits));
4909   PetscCall(PetscFree2(si_waits, sj_waits));
4910   PetscCall(PetscFree(ri_waits));
4911   PetscCall(PetscFree(buf_s));
4912   PetscCall(PetscFree(status));
4913 
4914   /* compute a local seq matrix in each processor */
4915   /* allocate bi array and free space for accumulating nonzero column info */
4916   PetscCall(PetscMalloc1(m + 1, &bi));
4917   bi[0] = 0;
4918 
4919   /* create and initialize a linked list */
4920   nlnk = N + 1;
4921   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4922 
4923   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4924   len = ai[owners[rank + 1]] - ai[owners[rank]];
4925   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4926 
4927   current_space = free_space;
4928 
4929   /* determine symbolic info for each local row */
4930   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4931 
4932   for (k = 0; k < merge->nrecv; k++) {
4933     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4934     nrows       = *buf_ri_k[k];
4935     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4936     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4937   }
4938 
4939   MatPreallocateBegin(comm, m, n, dnz, onz);
4940   len = 0;
4941   for (i = 0; i < m; i++) {
4942     bnzi = 0;
4943     /* add local non-zero cols of this proc's seqmat into lnk */
4944     arow = owners[rank] + i;
4945     anzi = ai[arow + 1] - ai[arow];
4946     aj   = a->j + ai[arow];
4947     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4948     bnzi += nlnk;
4949     /* add received col data into lnk */
4950     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4951       if (i == *nextrow[k]) {            /* i-th row */
4952         anzi = *(nextai[k] + 1) - *nextai[k];
4953         aj   = buf_rj[k] + *nextai[k];
4954         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4955         bnzi += nlnk;
4956         nextrow[k]++;
4957         nextai[k]++;
4958       }
4959     }
4960     if (len < bnzi) len = bnzi; /* =max(bnzi) */
4961 
4962     /* if free space is not available, make more free space */
4963     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
4964     /* copy data into free space, then initialize lnk */
4965     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
4966     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
4967 
4968     current_space->array += bnzi;
4969     current_space->local_used += bnzi;
4970     current_space->local_remaining -= bnzi;
4971 
4972     bi[i + 1] = bi[i] + bnzi;
4973   }
4974 
4975   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4976 
4977   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
4978   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
4979   PetscCall(PetscLLDestroy(lnk, lnkbt));
4980 
4981   /* create symbolic parallel matrix B_mpi */
4982   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
4983   PetscCall(MatCreate(comm, &B_mpi));
4984   if (n == PETSC_DECIDE) {
4985     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
4986   } else {
4987     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4988   }
4989   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
4990   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
4991   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
4992   MatPreallocateEnd(dnz, onz);
4993   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
4994 
4995   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4996   B_mpi->assembled = PETSC_FALSE;
4997   merge->bi        = bi;
4998   merge->bj        = bj;
4999   merge->buf_ri    = buf_ri;
5000   merge->buf_rj    = buf_rj;
5001   merge->coi       = NULL;
5002   merge->coj       = NULL;
5003   merge->owners_co = NULL;
5004 
5005   PetscCall(PetscCommDestroy(&comm));
5006 
5007   /* attach the supporting struct to B_mpi for reuse */
5008   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5009   PetscCall(PetscContainerSetPointer(container, merge));
5010   PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5011   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5012   PetscCall(PetscContainerDestroy(&container));
5013   *mpimat = B_mpi;
5014 
5015   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5016   PetscFunctionReturn(PETSC_SUCCESS);
5017 }
5018 
5019 /*@
5020   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5021   matrices from each processor
5022 
5023   Collective
5024 
5025   Input Parameters:
5026 + comm   - the communicators the parallel matrix will live on
5027 . seqmat - the input sequential matrices
5028 . m      - number of local rows (or `PETSC_DECIDE`)
5029 . n      - number of local columns (or `PETSC_DECIDE`)
5030 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5031 
5032   Output Parameter:
5033 . mpimat - the parallel matrix generated
5034 
5035   Level: advanced
5036 
5037   Note:
5038   The dimensions of the sequential matrix in each processor MUST be the same.
5039   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5040   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5041 
5042 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5043 @*/
5044 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5045 {
5046   PetscMPIInt size;
5047 
5048   PetscFunctionBegin;
5049   PetscCallMPI(MPI_Comm_size(comm, &size));
5050   if (size == 1) {
5051     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5052     if (scall == MAT_INITIAL_MATRIX) {
5053       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5054     } else {
5055       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5056     }
5057     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5058     PetscFunctionReturn(PETSC_SUCCESS);
5059   }
5060   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5061   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5062   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5063   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5064   PetscFunctionReturn(PETSC_SUCCESS);
5065 }
5066 
5067 /*@
5068   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5069 
5070   Not Collective
5071 
5072   Input Parameter:
5073 . A - the matrix
5074 
5075   Output Parameter:
5076 . A_loc - the local sequential matrix generated
5077 
5078   Level: developer
5079 
5080   Notes:
5081   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5082   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5083   `n` is the global column count obtained with `MatGetSize()`
5084 
5085   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5086 
5087   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5088 
5089   Destroy the matrix with `MatDestroy()`
5090 
5091 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5092 @*/
5093 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5094 {
5095   PetscBool mpi;
5096 
5097   PetscFunctionBegin;
5098   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5099   if (mpi) {
5100     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5101   } else {
5102     *A_loc = A;
5103     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5104   }
5105   PetscFunctionReturn(PETSC_SUCCESS);
5106 }
5107 
5108 /*@
5109   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5110 
5111   Not Collective
5112 
5113   Input Parameters:
5114 + A     - the matrix
5115 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5116 
5117   Output Parameter:
5118 . A_loc - the local sequential matrix generated
5119 
5120   Level: developer
5121 
5122   Notes:
5123   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5124   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5125   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5126 
5127   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5128 
5129   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5130   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5131   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5132   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5133 
5134 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5135 @*/
5136 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5137 {
5138   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5139   Mat_SeqAIJ        *mat, *a, *b;
5140   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5141   const PetscScalar *aa, *ba, *aav, *bav;
5142   PetscScalar       *ca, *cam;
5143   PetscMPIInt        size;
5144   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5145   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5146   PetscBool          match;
5147 
5148   PetscFunctionBegin;
5149   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5150   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5151   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5152   if (size == 1) {
5153     if (scall == MAT_INITIAL_MATRIX) {
5154       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5155       *A_loc = mpimat->A;
5156     } else if (scall == MAT_REUSE_MATRIX) {
5157       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5158     }
5159     PetscFunctionReturn(PETSC_SUCCESS);
5160   }
5161 
5162   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5163   a  = (Mat_SeqAIJ *)mpimat->A->data;
5164   b  = (Mat_SeqAIJ *)mpimat->B->data;
5165   ai = a->i;
5166   aj = a->j;
5167   bi = b->i;
5168   bj = b->j;
5169   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5170   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5171   aa = aav;
5172   ba = bav;
5173   if (scall == MAT_INITIAL_MATRIX) {
5174     PetscCall(PetscMalloc1(1 + am, &ci));
5175     ci[0] = 0;
5176     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5177     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5178     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5179     k = 0;
5180     for (i = 0; i < am; i++) {
5181       ncols_o = bi[i + 1] - bi[i];
5182       ncols_d = ai[i + 1] - ai[i];
5183       /* off-diagonal portion of A */
5184       for (jo = 0; jo < ncols_o; jo++) {
5185         col = cmap[*bj];
5186         if (col >= cstart) break;
5187         cj[k] = col;
5188         bj++;
5189         ca[k++] = *ba++;
5190       }
5191       /* diagonal portion of A */
5192       for (j = 0; j < ncols_d; j++) {
5193         cj[k]   = cstart + *aj++;
5194         ca[k++] = *aa++;
5195       }
5196       /* off-diagonal portion of A */
5197       for (j = jo; j < ncols_o; j++) {
5198         cj[k]   = cmap[*bj++];
5199         ca[k++] = *ba++;
5200       }
5201     }
5202     /* put together the new matrix */
5203     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5204     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5205     /* Since these are PETSc arrays, change flags to free them as necessary. */
5206     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5207     mat->free_a  = PETSC_TRUE;
5208     mat->free_ij = PETSC_TRUE;
5209     mat->nonew   = 0;
5210   } else if (scall == MAT_REUSE_MATRIX) {
5211     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5212     ci  = mat->i;
5213     cj  = mat->j;
5214     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5215     for (i = 0; i < am; i++) {
5216       /* off-diagonal portion of A */
5217       ncols_o = bi[i + 1] - bi[i];
5218       for (jo = 0; jo < ncols_o; jo++) {
5219         col = cmap[*bj];
5220         if (col >= cstart) break;
5221         *cam++ = *ba++;
5222         bj++;
5223       }
5224       /* diagonal portion of A */
5225       ncols_d = ai[i + 1] - ai[i];
5226       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5227       /* off-diagonal portion of A */
5228       for (j = jo; j < ncols_o; j++) {
5229         *cam++ = *ba++;
5230         bj++;
5231       }
5232     }
5233     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5234   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5235   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5236   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5237   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5238   PetscFunctionReturn(PETSC_SUCCESS);
5239 }
5240 
5241 /*@
5242   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5243   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5244 
5245   Not Collective
5246 
5247   Input Parameters:
5248 + A     - the matrix
5249 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5250 
5251   Output Parameters:
5252 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5253 - A_loc - the local sequential matrix generated
5254 
5255   Level: developer
5256 
5257   Note:
5258   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5259   part, then those associated with the off-diagonal part (in its local ordering)
5260 
5261 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5262 @*/
5263 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5264 {
5265   Mat             Ao, Ad;
5266   const PetscInt *cmap;
5267   PetscMPIInt     size;
5268   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5269 
5270   PetscFunctionBegin;
5271   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5272   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5273   if (size == 1) {
5274     if (scall == MAT_INITIAL_MATRIX) {
5275       PetscCall(PetscObjectReference((PetscObject)Ad));
5276       *A_loc = Ad;
5277     } else if (scall == MAT_REUSE_MATRIX) {
5278       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5279     }
5280     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5281     PetscFunctionReturn(PETSC_SUCCESS);
5282   }
5283   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5284   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5285   if (f) {
5286     PetscCall((*f)(A, scall, glob, A_loc));
5287   } else {
5288     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5289     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5290     Mat_SeqAIJ        *c;
5291     PetscInt          *ai = a->i, *aj = a->j;
5292     PetscInt          *bi = b->i, *bj = b->j;
5293     PetscInt          *ci, *cj;
5294     const PetscScalar *aa, *ba;
5295     PetscScalar       *ca;
5296     PetscInt           i, j, am, dn, on;
5297 
5298     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5299     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5300     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5301     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5302     if (scall == MAT_INITIAL_MATRIX) {
5303       PetscInt k;
5304       PetscCall(PetscMalloc1(1 + am, &ci));
5305       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5306       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5307       ci[0] = 0;
5308       for (i = 0, k = 0; i < am; i++) {
5309         const PetscInt ncols_o = bi[i + 1] - bi[i];
5310         const PetscInt ncols_d = ai[i + 1] - ai[i];
5311         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5312         /* diagonal portion of A */
5313         for (j = 0; j < ncols_d; j++, k++) {
5314           cj[k] = *aj++;
5315           ca[k] = *aa++;
5316         }
5317         /* off-diagonal portion of A */
5318         for (j = 0; j < ncols_o; j++, k++) {
5319           cj[k] = dn + *bj++;
5320           ca[k] = *ba++;
5321         }
5322       }
5323       /* put together the new matrix */
5324       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5325       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5326       /* Since these are PETSc arrays, change flags to free them as necessary. */
5327       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5328       c->free_a  = PETSC_TRUE;
5329       c->free_ij = PETSC_TRUE;
5330       c->nonew   = 0;
5331       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5332     } else if (scall == MAT_REUSE_MATRIX) {
5333       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5334       for (i = 0; i < am; i++) {
5335         const PetscInt ncols_d = ai[i + 1] - ai[i];
5336         const PetscInt ncols_o = bi[i + 1] - bi[i];
5337         /* diagonal portion of A */
5338         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5339         /* off-diagonal portion of A */
5340         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5341       }
5342       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5343     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5344     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5345     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5346     if (glob) {
5347       PetscInt cst, *gidx;
5348 
5349       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5350       PetscCall(PetscMalloc1(dn + on, &gidx));
5351       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5352       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5353       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5354     }
5355   }
5356   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5357   PetscFunctionReturn(PETSC_SUCCESS);
5358 }
5359 
5360 /*@C
5361   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5362 
5363   Not Collective
5364 
5365   Input Parameters:
5366 + A     - the matrix
5367 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5368 . row   - index set of rows to extract (or `NULL`)
5369 - col   - index set of columns to extract (or `NULL`)
5370 
5371   Output Parameter:
5372 . A_loc - the local sequential matrix generated
5373 
5374   Level: developer
5375 
5376 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5377 @*/
5378 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5379 {
5380   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5381   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5382   IS          isrowa, iscola;
5383   Mat        *aloc;
5384   PetscBool   match;
5385 
5386   PetscFunctionBegin;
5387   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5388   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5389   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5390   if (!row) {
5391     start = A->rmap->rstart;
5392     end   = A->rmap->rend;
5393     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5394   } else {
5395     isrowa = *row;
5396   }
5397   if (!col) {
5398     start = A->cmap->rstart;
5399     cmap  = a->garray;
5400     nzA   = a->A->cmap->n;
5401     nzB   = a->B->cmap->n;
5402     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5403     ncols = 0;
5404     for (i = 0; i < nzB; i++) {
5405       if (cmap[i] < start) idx[ncols++] = cmap[i];
5406       else break;
5407     }
5408     imark = i;
5409     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5410     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5411     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5412   } else {
5413     iscola = *col;
5414   }
5415   if (scall != MAT_INITIAL_MATRIX) {
5416     PetscCall(PetscMalloc1(1, &aloc));
5417     aloc[0] = *A_loc;
5418   }
5419   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5420   if (!col) { /* attach global id of condensed columns */
5421     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5422   }
5423   *A_loc = aloc[0];
5424   PetscCall(PetscFree(aloc));
5425   if (!row) PetscCall(ISDestroy(&isrowa));
5426   if (!col) PetscCall(ISDestroy(&iscola));
5427   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5428   PetscFunctionReturn(PETSC_SUCCESS);
5429 }
5430 
5431 /*
5432  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5433  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5434  * on a global size.
5435  * */
5436 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5437 {
5438   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5439   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5440   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5441   PetscMPIInt            owner;
5442   PetscSFNode           *iremote, *oiremote;
5443   const PetscInt        *lrowindices;
5444   PetscSF                sf, osf;
5445   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5446   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5447   MPI_Comm               comm;
5448   ISLocalToGlobalMapping mapping;
5449   const PetscScalar     *pd_a, *po_a;
5450 
5451   PetscFunctionBegin;
5452   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5453   /* plocalsize is the number of roots
5454    * nrows is the number of leaves
5455    * */
5456   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5457   PetscCall(ISGetLocalSize(rows, &nrows));
5458   PetscCall(PetscCalloc1(nrows, &iremote));
5459   PetscCall(ISGetIndices(rows, &lrowindices));
5460   for (i = 0; i < nrows; i++) {
5461     /* Find a remote index and an owner for a row
5462      * The row could be local or remote
5463      * */
5464     owner = 0;
5465     lidx  = 0;
5466     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5467     iremote[i].index = lidx;
5468     iremote[i].rank  = owner;
5469   }
5470   /* Create SF to communicate how many nonzero columns for each row */
5471   PetscCall(PetscSFCreate(comm, &sf));
5472   /* SF will figure out the number of nonzero columns for each row, and their
5473    * offsets
5474    * */
5475   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5476   PetscCall(PetscSFSetFromOptions(sf));
5477   PetscCall(PetscSFSetUp(sf));
5478 
5479   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5480   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5481   PetscCall(PetscCalloc1(nrows, &pnnz));
5482   roffsets[0] = 0;
5483   roffsets[1] = 0;
5484   for (i = 0; i < plocalsize; i++) {
5485     /* diagonal */
5486     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5487     /* off-diagonal */
5488     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5489     /* compute offsets so that we relative location for each row */
5490     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5491     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5492   }
5493   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5494   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5495   /* 'r' means root, and 'l' means leaf */
5496   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5497   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5498   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5499   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5500   PetscCall(PetscSFDestroy(&sf));
5501   PetscCall(PetscFree(roffsets));
5502   PetscCall(PetscFree(nrcols));
5503   dntotalcols = 0;
5504   ontotalcols = 0;
5505   ncol        = 0;
5506   for (i = 0; i < nrows; i++) {
5507     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5508     ncol    = PetscMax(pnnz[i], ncol);
5509     /* diagonal */
5510     dntotalcols += nlcols[i * 2 + 0];
5511     /* off-diagonal */
5512     ontotalcols += nlcols[i * 2 + 1];
5513   }
5514   /* We do not need to figure the right number of columns
5515    * since all the calculations will be done by going through the raw data
5516    * */
5517   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5518   PetscCall(MatSetUp(*P_oth));
5519   PetscCall(PetscFree(pnnz));
5520   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5521   /* diagonal */
5522   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5523   /* off-diagonal */
5524   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5525   /* diagonal */
5526   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5527   /* off-diagonal */
5528   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5529   dntotalcols = 0;
5530   ontotalcols = 0;
5531   ntotalcols  = 0;
5532   for (i = 0; i < nrows; i++) {
5533     owner = 0;
5534     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5535     /* Set iremote for diag matrix */
5536     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5537       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5538       iremote[dntotalcols].rank  = owner;
5539       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5540       ilocal[dntotalcols++] = ntotalcols++;
5541     }
5542     /* off-diagonal */
5543     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5544       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5545       oiremote[ontotalcols].rank  = owner;
5546       oilocal[ontotalcols++]      = ntotalcols++;
5547     }
5548   }
5549   PetscCall(ISRestoreIndices(rows, &lrowindices));
5550   PetscCall(PetscFree(loffsets));
5551   PetscCall(PetscFree(nlcols));
5552   PetscCall(PetscSFCreate(comm, &sf));
5553   /* P serves as roots and P_oth is leaves
5554    * Diag matrix
5555    * */
5556   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5557   PetscCall(PetscSFSetFromOptions(sf));
5558   PetscCall(PetscSFSetUp(sf));
5559 
5560   PetscCall(PetscSFCreate(comm, &osf));
5561   /* off-diagonal */
5562   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5563   PetscCall(PetscSFSetFromOptions(osf));
5564   PetscCall(PetscSFSetUp(osf));
5565   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5566   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5567   /* operate on the matrix internal data to save memory */
5568   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5569   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5570   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5571   /* Convert to global indices for diag matrix */
5572   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5573   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5574   /* We want P_oth store global indices */
5575   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5576   /* Use memory scalable approach */
5577   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5578   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5579   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5580   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5581   /* Convert back to local indices */
5582   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5583   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5584   nout = 0;
5585   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5586   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5587   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5588   /* Exchange values */
5589   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5590   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5591   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5592   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5593   /* Stop PETSc from shrinking memory */
5594   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5595   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5596   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5597   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5598   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5599   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5600   PetscCall(PetscSFDestroy(&sf));
5601   PetscCall(PetscSFDestroy(&osf));
5602   PetscFunctionReturn(PETSC_SUCCESS);
5603 }
5604 
5605 /*
5606  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5607  * This supports MPIAIJ and MAIJ
5608  * */
5609 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5610 {
5611   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5612   Mat_SeqAIJ *p_oth;
5613   IS          rows, map;
5614   PetscHMapI  hamp;
5615   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5616   MPI_Comm    comm;
5617   PetscSF     sf, osf;
5618   PetscBool   has;
5619 
5620   PetscFunctionBegin;
5621   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5622   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5623   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5624    *  and then create a submatrix (that often is an overlapping matrix)
5625    * */
5626   if (reuse == MAT_INITIAL_MATRIX) {
5627     /* Use a hash table to figure out unique keys */
5628     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5629     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5630     count = 0;
5631     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5632     for (i = 0; i < a->B->cmap->n; i++) {
5633       key = a->garray[i] / dof;
5634       PetscCall(PetscHMapIHas(hamp, key, &has));
5635       if (!has) {
5636         mapping[i] = count;
5637         PetscCall(PetscHMapISet(hamp, key, count++));
5638       } else {
5639         /* Current 'i' has the same value the previous step */
5640         mapping[i] = count - 1;
5641       }
5642     }
5643     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5644     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5645     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5646     PetscCall(PetscCalloc1(htsize, &rowindices));
5647     off = 0;
5648     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5649     PetscCall(PetscHMapIDestroy(&hamp));
5650     PetscCall(PetscSortInt(htsize, rowindices));
5651     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5652     /* In case, the matrix was already created but users want to recreate the matrix */
5653     PetscCall(MatDestroy(P_oth));
5654     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5655     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5656     PetscCall(ISDestroy(&map));
5657     PetscCall(ISDestroy(&rows));
5658   } else if (reuse == MAT_REUSE_MATRIX) {
5659     /* If matrix was already created, we simply update values using SF objects
5660      * that as attached to the matrix earlier.
5661      */
5662     const PetscScalar *pd_a, *po_a;
5663 
5664     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5665     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5666     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5667     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5668     /* Update values in place */
5669     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5670     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5671     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5672     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5673     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5674     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5675     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5676     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5677   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5678   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5679   PetscFunctionReturn(PETSC_SUCCESS);
5680 }
5681 
5682 /*@C
5683   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5684 
5685   Collective
5686 
5687   Input Parameters:
5688 + A     - the first matrix in `MATMPIAIJ` format
5689 . B     - the second matrix in `MATMPIAIJ` format
5690 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5691 
5692   Output Parameters:
5693 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5694 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5695 - B_seq - the sequential matrix generated
5696 
5697   Level: developer
5698 
5699 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5700 @*/
5701 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5702 {
5703   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5704   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5705   IS          isrowb, iscolb;
5706   Mat        *bseq = NULL;
5707 
5708   PetscFunctionBegin;
5709   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5710              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5711   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5712 
5713   if (scall == MAT_INITIAL_MATRIX) {
5714     start = A->cmap->rstart;
5715     cmap  = a->garray;
5716     nzA   = a->A->cmap->n;
5717     nzB   = a->B->cmap->n;
5718     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5719     ncols = 0;
5720     for (i = 0; i < nzB; i++) { /* row < local row index */
5721       if (cmap[i] < start) idx[ncols++] = cmap[i];
5722       else break;
5723     }
5724     imark = i;
5725     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5726     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5727     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5728     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5729   } else {
5730     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5731     isrowb = *rowb;
5732     iscolb = *colb;
5733     PetscCall(PetscMalloc1(1, &bseq));
5734     bseq[0] = *B_seq;
5735   }
5736   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5737   *B_seq = bseq[0];
5738   PetscCall(PetscFree(bseq));
5739   if (!rowb) {
5740     PetscCall(ISDestroy(&isrowb));
5741   } else {
5742     *rowb = isrowb;
5743   }
5744   if (!colb) {
5745     PetscCall(ISDestroy(&iscolb));
5746   } else {
5747     *colb = iscolb;
5748   }
5749   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5750   PetscFunctionReturn(PETSC_SUCCESS);
5751 }
5752 
5753 /*
5754     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5755     of the OFF-DIAGONAL portion of local A
5756 
5757     Collective
5758 
5759    Input Parameters:
5760 +    A,B - the matrices in `MATMPIAIJ` format
5761 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5762 
5763    Output Parameter:
5764 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5765 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5766 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5767 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5768 
5769     Developer Note:
5770     This directly accesses information inside the VecScatter associated with the matrix-vector product
5771      for this matrix. This is not desirable..
5772 
5773     Level: developer
5774 
5775 */
5776 
5777 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5778 {
5779   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5780   VecScatter         ctx;
5781   MPI_Comm           comm;
5782   const PetscMPIInt *rprocs, *sprocs;
5783   PetscMPIInt        nrecvs, nsends;
5784   const PetscInt    *srow, *rstarts, *sstarts;
5785   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5786   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5787   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5788   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5789   PetscMPIInt        size, tag, rank, nreqs;
5790 
5791   PetscFunctionBegin;
5792   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5793   PetscCallMPI(MPI_Comm_size(comm, &size));
5794 
5795   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5796              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5797   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5798   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5799 
5800   if (size == 1) {
5801     startsj_s = NULL;
5802     bufa_ptr  = NULL;
5803     *B_oth    = NULL;
5804     PetscFunctionReturn(PETSC_SUCCESS);
5805   }
5806 
5807   ctx = a->Mvctx;
5808   tag = ((PetscObject)ctx)->tag;
5809 
5810   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5811   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5812   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5813   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5814   PetscCall(PetscMalloc1(nreqs, &reqs));
5815   rwaits = reqs;
5816   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5817 
5818   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5819   if (scall == MAT_INITIAL_MATRIX) {
5820     /* i-array */
5821     /*  post receives */
5822     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5823     for (i = 0; i < nrecvs; i++) {
5824       rowlen = rvalues + rstarts[i] * rbs;
5825       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5826       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5827     }
5828 
5829     /* pack the outgoing message */
5830     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5831 
5832     sstartsj[0] = 0;
5833     rstartsj[0] = 0;
5834     len         = 0; /* total length of j or a array to be sent */
5835     if (nsends) {
5836       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5837       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5838     }
5839     for (i = 0; i < nsends; i++) {
5840       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5841       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5842       for (j = 0; j < nrows; j++) {
5843         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5844         for (l = 0; l < sbs; l++) {
5845           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5846 
5847           rowlen[j * sbs + l] = ncols;
5848 
5849           len += ncols;
5850           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5851         }
5852         k++;
5853       }
5854       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5855 
5856       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5857     }
5858     /* recvs and sends of i-array are completed */
5859     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5860     PetscCall(PetscFree(svalues));
5861 
5862     /* allocate buffers for sending j and a arrays */
5863     PetscCall(PetscMalloc1(len + 1, &bufj));
5864     PetscCall(PetscMalloc1(len + 1, &bufa));
5865 
5866     /* create i-array of B_oth */
5867     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5868 
5869     b_othi[0] = 0;
5870     len       = 0; /* total length of j or a array to be received */
5871     k         = 0;
5872     for (i = 0; i < nrecvs; i++) {
5873       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5874       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5875       for (j = 0; j < nrows; j++) {
5876         b_othi[k + 1] = b_othi[k] + rowlen[j];
5877         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5878         k++;
5879       }
5880       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5881     }
5882     PetscCall(PetscFree(rvalues));
5883 
5884     /* allocate space for j and a arrays of B_oth */
5885     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5886     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5887 
5888     /* j-array */
5889     /*  post receives of j-array */
5890     for (i = 0; i < nrecvs; i++) {
5891       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5892       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5893     }
5894 
5895     /* pack the outgoing message j-array */
5896     if (nsends) k = sstarts[0];
5897     for (i = 0; i < nsends; i++) {
5898       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5899       bufJ  = bufj + sstartsj[i];
5900       for (j = 0; j < nrows; j++) {
5901         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5902         for (ll = 0; ll < sbs; ll++) {
5903           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5904           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5905           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5906         }
5907       }
5908       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5909     }
5910 
5911     /* recvs and sends of j-array are completed */
5912     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5913   } else if (scall == MAT_REUSE_MATRIX) {
5914     sstartsj = *startsj_s;
5915     rstartsj = *startsj_r;
5916     bufa     = *bufa_ptr;
5917     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5918   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5919 
5920   /* a-array */
5921   /*  post receives of a-array */
5922   for (i = 0; i < nrecvs; i++) {
5923     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5924     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5925   }
5926 
5927   /* pack the outgoing message a-array */
5928   if (nsends) k = sstarts[0];
5929   for (i = 0; i < nsends; i++) {
5930     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5931     bufA  = bufa + sstartsj[i];
5932     for (j = 0; j < nrows; j++) {
5933       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5934       for (ll = 0; ll < sbs; ll++) {
5935         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5936         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5937         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5938       }
5939     }
5940     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5941   }
5942   /* recvs and sends of a-array are completed */
5943   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5944   PetscCall(PetscFree(reqs));
5945 
5946   if (scall == MAT_INITIAL_MATRIX) {
5947     Mat_SeqAIJ *b_oth;
5948 
5949     /* put together the new matrix */
5950     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5951 
5952     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5953     /* Since these are PETSc arrays, change flags to free them as necessary. */
5954     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5955     b_oth->free_a  = PETSC_TRUE;
5956     b_oth->free_ij = PETSC_TRUE;
5957     b_oth->nonew   = 0;
5958 
5959     PetscCall(PetscFree(bufj));
5960     if (!startsj_s || !bufa_ptr) {
5961       PetscCall(PetscFree2(sstartsj, rstartsj));
5962       PetscCall(PetscFree(bufa_ptr));
5963     } else {
5964       *startsj_s = sstartsj;
5965       *startsj_r = rstartsj;
5966       *bufa_ptr  = bufa;
5967     }
5968   } else if (scall == MAT_REUSE_MATRIX) {
5969     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
5970   }
5971 
5972   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
5973   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
5974   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
5975   PetscFunctionReturn(PETSC_SUCCESS);
5976 }
5977 
5978 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
5979 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
5980 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
5981 #if defined(PETSC_HAVE_MKL_SPARSE)
5982 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
5983 #endif
5984 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
5985 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
5986 #if defined(PETSC_HAVE_ELEMENTAL)
5987 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
5988 #endif
5989 #if defined(PETSC_HAVE_SCALAPACK)
5990 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
5991 #endif
5992 #if defined(PETSC_HAVE_HYPRE)
5993 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
5994 #endif
5995 #if defined(PETSC_HAVE_CUDA)
5996 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
5997 #endif
5998 #if defined(PETSC_HAVE_HIP)
5999 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6000 #endif
6001 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6002 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6003 #endif
6004 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6005 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6006 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6007 
6008 /*
6009     Computes (B'*A')' since computing B*A directly is untenable
6010 
6011                n                       p                          p
6012         [             ]       [             ]         [                 ]
6013       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6014         [             ]       [             ]         [                 ]
6015 
6016 */
6017 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6018 {
6019   Mat At, Bt, Ct;
6020 
6021   PetscFunctionBegin;
6022   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6023   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6024   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6025   PetscCall(MatDestroy(&At));
6026   PetscCall(MatDestroy(&Bt));
6027   PetscCall(MatTransposeSetPrecursor(Ct, C));
6028   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6029   PetscCall(MatDestroy(&Ct));
6030   PetscFunctionReturn(PETSC_SUCCESS);
6031 }
6032 
6033 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6034 {
6035   PetscBool cisdense;
6036 
6037   PetscFunctionBegin;
6038   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6039   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6040   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6041   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6042   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6043   PetscCall(MatSetUp(C));
6044 
6045   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6046   PetscFunctionReturn(PETSC_SUCCESS);
6047 }
6048 
6049 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6050 {
6051   Mat_Product *product = C->product;
6052   Mat          A = product->A, B = product->B;
6053 
6054   PetscFunctionBegin;
6055   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6056              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6057   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6058   C->ops->productsymbolic = MatProductSymbolic_AB;
6059   PetscFunctionReturn(PETSC_SUCCESS);
6060 }
6061 
6062 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6063 {
6064   Mat_Product *product = C->product;
6065 
6066   PetscFunctionBegin;
6067   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6068   PetscFunctionReturn(PETSC_SUCCESS);
6069 }
6070 
6071 /*
6072    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6073 
6074   Input Parameters:
6075 
6076     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6077     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6078 
6079     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6080 
6081     For Set1, j1[] contains column indices of the nonzeros.
6082     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6083     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6084     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6085 
6086     Similar for Set2.
6087 
6088     This routine merges the two sets of nonzeros row by row and removes repeats.
6089 
6090   Output Parameters: (memory is allocated by the caller)
6091 
6092     i[],j[]: the CSR of the merged matrix, which has m rows.
6093     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6094     imap2[]: similar to imap1[], but for Set2.
6095     Note we order nonzeros row-by-row and from left to right.
6096 */
6097 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6098 {
6099   PetscInt   r, m; /* Row index of mat */
6100   PetscCount t, t1, t2, b1, e1, b2, e2;
6101 
6102   PetscFunctionBegin;
6103   PetscCall(MatGetLocalSize(mat, &m, NULL));
6104   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6105   i[0]        = 0;
6106   for (r = 0; r < m; r++) { /* Do row by row merging */
6107     b1 = rowBegin1[r];
6108     e1 = rowEnd1[r];
6109     b2 = rowBegin2[r];
6110     e2 = rowEnd2[r];
6111     while (b1 < e1 && b2 < e2) {
6112       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6113         j[t]      = j1[b1];
6114         imap1[t1] = t;
6115         imap2[t2] = t;
6116         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6117         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6118         t1++;
6119         t2++;
6120         t++;
6121       } else if (j1[b1] < j2[b2]) {
6122         j[t]      = j1[b1];
6123         imap1[t1] = t;
6124         b1 += jmap1[t1 + 1] - jmap1[t1];
6125         t1++;
6126         t++;
6127       } else {
6128         j[t]      = j2[b2];
6129         imap2[t2] = t;
6130         b2 += jmap2[t2 + 1] - jmap2[t2];
6131         t2++;
6132         t++;
6133       }
6134     }
6135     /* Merge the remaining in either j1[] or j2[] */
6136     while (b1 < e1) {
6137       j[t]      = j1[b1];
6138       imap1[t1] = t;
6139       b1 += jmap1[t1 + 1] - jmap1[t1];
6140       t1++;
6141       t++;
6142     }
6143     while (b2 < e2) {
6144       j[t]      = j2[b2];
6145       imap2[t2] = t;
6146       b2 += jmap2[t2 + 1] - jmap2[t2];
6147       t2++;
6148       t++;
6149     }
6150     PetscCall(PetscIntCast(t, i + r + 1));
6151   }
6152   PetscFunctionReturn(PETSC_SUCCESS);
6153 }
6154 
6155 /*
6156   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6157 
6158   Input Parameters:
6159     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6160     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6161       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6162 
6163       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6164       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6165 
6166   Output Parameters:
6167     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6168     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6169       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6170       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6171 
6172     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6173       Atot: number of entries belonging to the diagonal block.
6174       Annz: number of unique nonzeros belonging to the diagonal block.
6175       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6176         repeats (i.e., same 'i,j' pair).
6177       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6178         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6179 
6180       Atot: number of entries belonging to the diagonal block
6181       Annz: number of unique nonzeros belonging to the diagonal block.
6182 
6183     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6184 
6185     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6186 */
6187 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6188 {
6189   PetscInt    cstart, cend, rstart, rend, row, col;
6190   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6191   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6192   PetscCount  k, m, p, q, r, s, mid;
6193   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6194 
6195   PetscFunctionBegin;
6196   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6197   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6198   m = rend - rstart;
6199 
6200   /* Skip negative rows */
6201   for (k = 0; k < n; k++)
6202     if (i[k] >= 0) break;
6203 
6204   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6205      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6206   */
6207   while (k < n) {
6208     row = i[k];
6209     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6210     for (s = k; s < n; s++)
6211       if (i[s] != row) break;
6212 
6213     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6214     for (p = k; p < s; p++) {
6215       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6216     }
6217     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6218     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6219     rowBegin[row - rstart] = k;
6220     rowMid[row - rstart]   = mid;
6221     rowEnd[row - rstart]   = s;
6222     PetscCheck(k == s || j[s - 1] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is >= matrix column size %" PetscInt_FMT, j[s - 1], mat->cmap->N);
6223 
6224     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6225     Atot += mid - k;
6226     Btot += s - mid;
6227 
6228     /* Count unique nonzeros of this diag row */
6229     for (p = k; p < mid;) {
6230       col = j[p];
6231       do {
6232         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6233         p++;
6234       } while (p < mid && j[p] == col);
6235       Annz++;
6236     }
6237 
6238     /* Count unique nonzeros of this offdiag row */
6239     for (p = mid; p < s;) {
6240       col = j[p];
6241       do {
6242         p++;
6243       } while (p < s && j[p] == col);
6244       Bnnz++;
6245     }
6246     k = s;
6247   }
6248 
6249   /* Allocation according to Atot, Btot, Annz, Bnnz */
6250   PetscCall(PetscMalloc1(Atot, &Aperm));
6251   PetscCall(PetscMalloc1(Btot, &Bperm));
6252   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6253   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6254 
6255   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6256   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6257   for (r = 0; r < m; r++) {
6258     k   = rowBegin[r];
6259     mid = rowMid[r];
6260     s   = rowEnd[r];
6261     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6262     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6263     Atot += mid - k;
6264     Btot += s - mid;
6265 
6266     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6267     for (p = k; p < mid;) {
6268       col = j[p];
6269       q   = p;
6270       do {
6271         p++;
6272       } while (p < mid && j[p] == col);
6273       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6274       Annz++;
6275     }
6276 
6277     for (p = mid; p < s;) {
6278       col = j[p];
6279       q   = p;
6280       do {
6281         p++;
6282       } while (p < s && j[p] == col);
6283       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6284       Bnnz++;
6285     }
6286   }
6287   /* Output */
6288   *Aperm_ = Aperm;
6289   *Annz_  = Annz;
6290   *Atot_  = Atot;
6291   *Ajmap_ = Ajmap;
6292   *Bperm_ = Bperm;
6293   *Bnnz_  = Bnnz;
6294   *Btot_  = Btot;
6295   *Bjmap_ = Bjmap;
6296   PetscFunctionReturn(PETSC_SUCCESS);
6297 }
6298 
6299 /*
6300   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6301 
6302   Input Parameters:
6303     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6304     nnz:  number of unique nonzeros in the merged matrix
6305     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6306     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6307 
6308   Output Parameter: (memory is allocated by the caller)
6309     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6310 
6311   Example:
6312     nnz1 = 4
6313     nnz  = 6
6314     imap = [1,3,4,5]
6315     jmap = [0,3,5,6,7]
6316    then,
6317     jmap_new = [0,0,3,3,5,6,7]
6318 */
6319 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6320 {
6321   PetscCount k, p;
6322 
6323   PetscFunctionBegin;
6324   jmap_new[0] = 0;
6325   p           = nnz;                /* p loops over jmap_new[] backwards */
6326   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6327     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6328   }
6329   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6330   PetscFunctionReturn(PETSC_SUCCESS);
6331 }
6332 
6333 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data)
6334 {
6335   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data;
6336 
6337   PetscFunctionBegin;
6338   PetscCall(PetscSFDestroy(&coo->sf));
6339   PetscCall(PetscFree(coo->Aperm1));
6340   PetscCall(PetscFree(coo->Bperm1));
6341   PetscCall(PetscFree(coo->Ajmap1));
6342   PetscCall(PetscFree(coo->Bjmap1));
6343   PetscCall(PetscFree(coo->Aimap2));
6344   PetscCall(PetscFree(coo->Bimap2));
6345   PetscCall(PetscFree(coo->Aperm2));
6346   PetscCall(PetscFree(coo->Bperm2));
6347   PetscCall(PetscFree(coo->Ajmap2));
6348   PetscCall(PetscFree(coo->Bjmap2));
6349   PetscCall(PetscFree(coo->Cperm1));
6350   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6351   PetscCall(PetscFree(coo));
6352   PetscFunctionReturn(PETSC_SUCCESS);
6353 }
6354 
6355 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6356 {
6357   MPI_Comm             comm;
6358   PetscMPIInt          rank, size;
6359   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6360   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6361   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6362   PetscContainer       container;
6363   MatCOOStruct_MPIAIJ *coo;
6364 
6365   PetscFunctionBegin;
6366   PetscCall(PetscFree(mpiaij->garray));
6367   PetscCall(VecDestroy(&mpiaij->lvec));
6368 #if defined(PETSC_USE_CTABLE)
6369   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6370 #else
6371   PetscCall(PetscFree(mpiaij->colmap));
6372 #endif
6373   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6374   mat->assembled     = PETSC_FALSE;
6375   mat->was_assembled = PETSC_FALSE;
6376 
6377   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6378   PetscCallMPI(MPI_Comm_size(comm, &size));
6379   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6380   PetscCall(PetscLayoutSetUp(mat->rmap));
6381   PetscCall(PetscLayoutSetUp(mat->cmap));
6382   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6383   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6384   PetscCall(MatGetLocalSize(mat, &m, &n));
6385   PetscCall(MatGetSize(mat, &M, &N));
6386 
6387   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6388   /* entries come first, then local rows, then remote rows.                     */
6389   PetscCount n1 = coo_n, *perm1;
6390   PetscInt  *i1 = coo_i, *j1 = coo_j;
6391 
6392   PetscCall(PetscMalloc1(n1, &perm1));
6393   for (k = 0; k < n1; k++) perm1[k] = k;
6394 
6395   /* Manipulate indices so that entries with negative row or col indices will have smallest
6396      row indices, local entries will have greater but negative row indices, and remote entries
6397      will have positive row indices.
6398   */
6399   for (k = 0; k < n1; k++) {
6400     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6401     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6402     else {
6403       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6404       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6405     }
6406   }
6407 
6408   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6409   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6410 
6411   /* Advance k to the first entry we need to take care of */
6412   for (k = 0; k < n1; k++)
6413     if (i1[k] > PETSC_INT_MIN) break;
6414   PetscCount i1start = k;
6415 
6416   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6417   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6418 
6419   PetscCheck(i1 == NULL || i1[n1 - 1] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "COO row index %" PetscInt_FMT " is >= the matrix row size %" PetscInt_FMT, i1[n1 - 1], M);
6420 
6421   /*           Send remote rows to their owner                                  */
6422   /* Find which rows should be sent to which remote ranks*/
6423   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6424   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6425   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6426   const PetscInt *ranges;
6427   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6428 
6429   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6430   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6431   for (k = rem; k < n1;) {
6432     PetscMPIInt owner;
6433     PetscInt    firstRow, lastRow;
6434 
6435     /* Locate a row range */
6436     firstRow = i1[k]; /* first row of this owner */
6437     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6438     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6439 
6440     /* Find the first index 'p' in [k,n) with i1[p] belonging to next owner */
6441     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6442 
6443     /* All entries in [k,p) belong to this remote owner */
6444     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6445       PetscMPIInt *sendto2;
6446       PetscInt    *nentries2;
6447       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6448 
6449       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6450       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6451       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6452       PetscCall(PetscFree2(sendto, nentries2));
6453       sendto   = sendto2;
6454       nentries = nentries2;
6455       maxNsend = maxNsend2;
6456     }
6457     sendto[nsend] = owner;
6458     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6459     nsend++;
6460     k = p;
6461   }
6462 
6463   /* Build 1st SF to know offsets on remote to send data */
6464   PetscSF      sf1;
6465   PetscInt     nroots = 1, nroots2 = 0;
6466   PetscInt     nleaves = nsend, nleaves2 = 0;
6467   PetscInt    *offsets;
6468   PetscSFNode *iremote;
6469 
6470   PetscCall(PetscSFCreate(comm, &sf1));
6471   PetscCall(PetscMalloc1(nsend, &iremote));
6472   PetscCall(PetscMalloc1(nsend, &offsets));
6473   for (k = 0; k < nsend; k++) {
6474     iremote[k].rank  = sendto[k];
6475     iremote[k].index = 0;
6476     nleaves2 += nentries[k];
6477     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6478   }
6479   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6480   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6481   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6482   PetscCall(PetscSFDestroy(&sf1));
6483   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6484 
6485   /* Build 2nd SF to send remote COOs to their owner */
6486   PetscSF sf2;
6487   nroots  = nroots2;
6488   nleaves = nleaves2;
6489   PetscCall(PetscSFCreate(comm, &sf2));
6490   PetscCall(PetscSFSetFromOptions(sf2));
6491   PetscCall(PetscMalloc1(nleaves, &iremote));
6492   p = 0;
6493   for (k = 0; k < nsend; k++) {
6494     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6495     for (q = 0; q < nentries[k]; q++, p++) {
6496       iremote[p].rank = sendto[k];
6497       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6498     }
6499   }
6500   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6501 
6502   /* Send the remote COOs to their owner */
6503   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6504   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6505   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6506   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6507   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6508   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6509   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6510   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6511   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6512   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6513   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6514 
6515   PetscCall(PetscFree(offsets));
6516   PetscCall(PetscFree2(sendto, nentries));
6517 
6518   /* Sort received COOs by row along with the permutation array     */
6519   for (k = 0; k < n2; k++) perm2[k] = k;
6520   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6521 
6522   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6523   PetscCount *Cperm1;
6524   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6525   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6526   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6527   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6528 
6529   /* Support for HYPRE matrices, kind of a hack.
6530      Swap min column with diagonal so that diagonal values will go first */
6531   PetscBool hypre;
6532   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6533   if (hypre) {
6534     PetscInt *minj;
6535     PetscBT   hasdiag;
6536 
6537     PetscCall(PetscBTCreate(m, &hasdiag));
6538     PetscCall(PetscMalloc1(m, &minj));
6539     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6540     for (k = i1start; k < rem; k++) {
6541       if (j1[k] < cstart || j1[k] >= cend) continue;
6542       const PetscInt rindex = i1[k] - rstart;
6543       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6544       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6545     }
6546     for (k = 0; k < n2; k++) {
6547       if (j2[k] < cstart || j2[k] >= cend) continue;
6548       const PetscInt rindex = i2[k] - rstart;
6549       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6550       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6551     }
6552     for (k = i1start; k < rem; k++) {
6553       const PetscInt rindex = i1[k] - rstart;
6554       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6555       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6556       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6557     }
6558     for (k = 0; k < n2; k++) {
6559       const PetscInt rindex = i2[k] - rstart;
6560       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6561       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6562       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6563     }
6564     PetscCall(PetscBTDestroy(&hasdiag));
6565     PetscCall(PetscFree(minj));
6566   }
6567 
6568   /* Split local COOs and received COOs into diag/offdiag portions */
6569   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6570   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6571   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6572   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6573   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6574   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6575 
6576   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6577   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6578   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6579   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6580 
6581   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6582   PetscInt *Ai, *Bi;
6583   PetscInt *Aj, *Bj;
6584 
6585   PetscCall(PetscMalloc1(m + 1, &Ai));
6586   PetscCall(PetscMalloc1(m + 1, &Bi));
6587   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6588   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6589 
6590   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6591   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6592   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6593   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6594   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6595 
6596   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6597   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6598 
6599   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6600   /* expect nonzeros in A/B most likely have local contributing entries        */
6601   PetscInt    Annz = Ai[m];
6602   PetscInt    Bnnz = Bi[m];
6603   PetscCount *Ajmap1_new, *Bjmap1_new;
6604 
6605   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6606   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6607 
6608   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6609   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6610 
6611   PetscCall(PetscFree(Aimap1));
6612   PetscCall(PetscFree(Ajmap1));
6613   PetscCall(PetscFree(Bimap1));
6614   PetscCall(PetscFree(Bjmap1));
6615   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6616   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6617   PetscCall(PetscFree(perm1));
6618   PetscCall(PetscFree3(i2, j2, perm2));
6619 
6620   Ajmap1 = Ajmap1_new;
6621   Bjmap1 = Bjmap1_new;
6622 
6623   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6624   if (Annz < Annz1 + Annz2) {
6625     PetscInt *Aj_new;
6626     PetscCall(PetscMalloc1(Annz, &Aj_new));
6627     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6628     PetscCall(PetscFree(Aj));
6629     Aj = Aj_new;
6630   }
6631 
6632   if (Bnnz < Bnnz1 + Bnnz2) {
6633     PetscInt *Bj_new;
6634     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6635     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6636     PetscCall(PetscFree(Bj));
6637     Bj = Bj_new;
6638   }
6639 
6640   /* Create new submatrices for on-process and off-process coupling                  */
6641   PetscScalar     *Aa, *Ba;
6642   MatType          rtype;
6643   Mat_SeqAIJ      *a, *b;
6644   PetscObjectState state;
6645   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6646   PetscCall(PetscCalloc1(Bnnz, &Ba));
6647   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6648   if (cstart) {
6649     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6650   }
6651 
6652   PetscCall(MatGetRootType_Private(mat, &rtype));
6653 
6654   MatSeqXAIJGetOptions_Private(mpiaij->A);
6655   PetscCall(MatDestroy(&mpiaij->A));
6656   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6657   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6658   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6659 
6660   MatSeqXAIJGetOptions_Private(mpiaij->B);
6661   PetscCall(MatDestroy(&mpiaij->B));
6662   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6663   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6664   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6665 
6666   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6667   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6668   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6669   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6670 
6671   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6672   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6673   a->free_a  = PETSC_TRUE;
6674   a->free_ij = PETSC_TRUE;
6675   b->free_a  = PETSC_TRUE;
6676   b->free_ij = PETSC_TRUE;
6677   a->maxnz   = a->nz;
6678   b->maxnz   = b->nz;
6679 
6680   /* conversion must happen AFTER multiply setup */
6681   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6682   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6683   PetscCall(VecDestroy(&mpiaij->lvec));
6684   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6685 
6686   // Put the COO struct in a container and then attach that to the matrix
6687   PetscCall(PetscMalloc1(1, &coo));
6688   coo->n       = coo_n;
6689   coo->sf      = sf2;
6690   coo->sendlen = nleaves;
6691   coo->recvlen = nroots;
6692   coo->Annz    = Annz;
6693   coo->Bnnz    = Bnnz;
6694   coo->Annz2   = Annz2;
6695   coo->Bnnz2   = Bnnz2;
6696   coo->Atot1   = Atot1;
6697   coo->Atot2   = Atot2;
6698   coo->Btot1   = Btot1;
6699   coo->Btot2   = Btot2;
6700   coo->Ajmap1  = Ajmap1;
6701   coo->Aperm1  = Aperm1;
6702   coo->Bjmap1  = Bjmap1;
6703   coo->Bperm1  = Bperm1;
6704   coo->Aimap2  = Aimap2;
6705   coo->Ajmap2  = Ajmap2;
6706   coo->Aperm2  = Aperm2;
6707   coo->Bimap2  = Bimap2;
6708   coo->Bjmap2  = Bjmap2;
6709   coo->Bperm2  = Bperm2;
6710   coo->Cperm1  = Cperm1;
6711   // Allocate in preallocation. If not used, it has zero cost on host
6712   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6713   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6714   PetscCall(PetscContainerSetPointer(container, coo));
6715   PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ));
6716   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6717   PetscCall(PetscContainerDestroy(&container));
6718   PetscFunctionReturn(PETSC_SUCCESS);
6719 }
6720 
6721 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6722 {
6723   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6724   Mat                  A = mpiaij->A, B = mpiaij->B;
6725   PetscScalar         *Aa, *Ba;
6726   PetscScalar         *sendbuf, *recvbuf;
6727   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6728   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6729   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6730   const PetscCount    *Cperm1;
6731   PetscContainer       container;
6732   MatCOOStruct_MPIAIJ *coo;
6733 
6734   PetscFunctionBegin;
6735   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6736   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6737   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6738   sendbuf = coo->sendbuf;
6739   recvbuf = coo->recvbuf;
6740   Ajmap1  = coo->Ajmap1;
6741   Ajmap2  = coo->Ajmap2;
6742   Aimap2  = coo->Aimap2;
6743   Bjmap1  = coo->Bjmap1;
6744   Bjmap2  = coo->Bjmap2;
6745   Bimap2  = coo->Bimap2;
6746   Aperm1  = coo->Aperm1;
6747   Aperm2  = coo->Aperm2;
6748   Bperm1  = coo->Bperm1;
6749   Bperm2  = coo->Bperm2;
6750   Cperm1  = coo->Cperm1;
6751 
6752   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6753   PetscCall(MatSeqAIJGetArray(B, &Ba));
6754 
6755   /* Pack entries to be sent to remote */
6756   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6757 
6758   /* Send remote entries to their owner and overlap the communication with local computation */
6759   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6760   /* Add local entries to A and B */
6761   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6762     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6763     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6764     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6765   }
6766   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6767     PetscScalar sum = 0.0;
6768     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6769     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6770   }
6771   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6772 
6773   /* Add received remote entries to A and B */
6774   for (PetscCount i = 0; i < coo->Annz2; i++) {
6775     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6776   }
6777   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6778     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6779   }
6780   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6781   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6782   PetscFunctionReturn(PETSC_SUCCESS);
6783 }
6784 
6785 /*MC
6786    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6787 
6788    Options Database Keys:
6789 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6790 
6791    Level: beginner
6792 
6793    Notes:
6794    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6795     in this case the values associated with the rows and columns one passes in are set to zero
6796     in the matrix
6797 
6798     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6799     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6800 
6801 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6802 M*/
6803 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6804 {
6805   Mat_MPIAIJ *b;
6806   PetscMPIInt size;
6807 
6808   PetscFunctionBegin;
6809   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6810 
6811   PetscCall(PetscNew(&b));
6812   B->data       = (void *)b;
6813   B->ops[0]     = MatOps_Values;
6814   B->assembled  = PETSC_FALSE;
6815   B->insertmode = NOT_SET_VALUES;
6816   b->size       = size;
6817 
6818   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6819 
6820   /* build cache for off array entries formed */
6821   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6822 
6823   b->donotstash  = PETSC_FALSE;
6824   b->colmap      = NULL;
6825   b->garray      = NULL;
6826   b->roworiented = PETSC_TRUE;
6827 
6828   /* stuff used for matrix vector multiply */
6829   b->lvec  = NULL;
6830   b->Mvctx = NULL;
6831 
6832   /* stuff for MatGetRow() */
6833   b->rowindices   = NULL;
6834   b->rowvalues    = NULL;
6835   b->getrowactive = PETSC_FALSE;
6836 
6837   /* flexible pointer used in CUSPARSE classes */
6838   b->spptr = NULL;
6839 
6840   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6841   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6842   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6843   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6844   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6845   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6846   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ));
6847   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6848   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6849   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6850   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6851 #if defined(PETSC_HAVE_CUDA)
6852   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6853 #endif
6854 #if defined(PETSC_HAVE_HIP)
6855   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6856 #endif
6857 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6858   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6859 #endif
6860 #if defined(PETSC_HAVE_MKL_SPARSE)
6861   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6862 #endif
6863   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6864   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6865   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6866   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6867 #if defined(PETSC_HAVE_ELEMENTAL)
6868   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6869 #endif
6870 #if defined(PETSC_HAVE_SCALAPACK)
6871   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6872 #endif
6873   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6874   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6875 #if defined(PETSC_HAVE_HYPRE)
6876   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6877   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6878 #endif
6879   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6880   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6881   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6882   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6883   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6884   PetscFunctionReturn(PETSC_SUCCESS);
6885 }
6886 
6887 /*@
6888   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6889   and "off-diagonal" part of the matrix in CSR format.
6890 
6891   Collective
6892 
6893   Input Parameters:
6894 + comm - MPI communicator
6895 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6896 . n    - This value should be the same as the local size used in creating the
6897          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6898          calculated if `N` is given) For square matrices `n` is almost always `m`.
6899 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6900 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6901 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6902 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6903 . a    - matrix values
6904 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6905 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6906 - oa   - matrix values
6907 
6908   Output Parameter:
6909 . mat - the matrix
6910 
6911   Level: advanced
6912 
6913   Notes:
6914   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6915   must free the arrays once the matrix has been destroyed and not before.
6916 
6917   The `i` and `j` indices are 0 based
6918 
6919   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6920 
6921   This sets local rows and cannot be used to set off-processor values.
6922 
6923   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6924   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6925   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6926   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6927   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6928   communication if it is known that only local entries will be set.
6929 
6930 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6931           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6932 @*/
6933 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6934 {
6935   Mat_MPIAIJ *maij;
6936 
6937   PetscFunctionBegin;
6938   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6939   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6940   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6941   PetscCall(MatCreate(comm, mat));
6942   PetscCall(MatSetSizes(*mat, m, n, M, N));
6943   PetscCall(MatSetType(*mat, MATMPIAIJ));
6944   maij = (Mat_MPIAIJ *)(*mat)->data;
6945 
6946   (*mat)->preallocated = PETSC_TRUE;
6947 
6948   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6949   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6950 
6951   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6952   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6953 
6954   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6955   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6956   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6957   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6958   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6959   PetscFunctionReturn(PETSC_SUCCESS);
6960 }
6961 
6962 typedef struct {
6963   Mat       *mp;    /* intermediate products */
6964   PetscBool *mptmp; /* is the intermediate product temporary ? */
6965   PetscInt   cp;    /* number of intermediate products */
6966 
6967   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6968   PetscInt    *startsj_s, *startsj_r;
6969   PetscScalar *bufa;
6970   Mat          P_oth;
6971 
6972   /* may take advantage of merging product->B */
6973   Mat Bloc; /* B-local by merging diag and off-diag */
6974 
6975   /* cusparse does not have support to split between symbolic and numeric phases.
6976      When api_user is true, we don't need to update the numerical values
6977      of the temporary storage */
6978   PetscBool reusesym;
6979 
6980   /* support for COO values insertion */
6981   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6982   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6983   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6984   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6985   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6986   PetscMemType mtype;
6987 
6988   /* customization */
6989   PetscBool abmerge;
6990   PetscBool P_oth_bind;
6991 } MatMatMPIAIJBACKEND;
6992 
6993 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6994 {
6995   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6996   PetscInt             i;
6997 
6998   PetscFunctionBegin;
6999   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7000   PetscCall(PetscFree(mmdata->bufa));
7001   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7002   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7003   PetscCall(MatDestroy(&mmdata->P_oth));
7004   PetscCall(MatDestroy(&mmdata->Bloc));
7005   PetscCall(PetscSFDestroy(&mmdata->sf));
7006   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7007   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7008   PetscCall(PetscFree(mmdata->own[0]));
7009   PetscCall(PetscFree(mmdata->own));
7010   PetscCall(PetscFree(mmdata->off[0]));
7011   PetscCall(PetscFree(mmdata->off));
7012   PetscCall(PetscFree(mmdata));
7013   PetscFunctionReturn(PETSC_SUCCESS);
7014 }
7015 
7016 /* Copy selected n entries with indices in idx[] of A to v[].
7017    If idx is NULL, copy the whole data array of A to v[]
7018  */
7019 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7020 {
7021   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7022 
7023   PetscFunctionBegin;
7024   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7025   if (f) {
7026     PetscCall((*f)(A, n, idx, v));
7027   } else {
7028     const PetscScalar *vv;
7029 
7030     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7031     if (n && idx) {
7032       PetscScalar    *w  = v;
7033       const PetscInt *oi = idx;
7034       PetscInt        j;
7035 
7036       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7037     } else {
7038       PetscCall(PetscArraycpy(v, vv, n));
7039     }
7040     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7041   }
7042   PetscFunctionReturn(PETSC_SUCCESS);
7043 }
7044 
7045 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7046 {
7047   MatMatMPIAIJBACKEND *mmdata;
7048   PetscInt             i, n_d, n_o;
7049 
7050   PetscFunctionBegin;
7051   MatCheckProduct(C, 1);
7052   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7053   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7054   if (!mmdata->reusesym) { /* update temporary matrices */
7055     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7056     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7057   }
7058   mmdata->reusesym = PETSC_FALSE;
7059 
7060   for (i = 0; i < mmdata->cp; i++) {
7061     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7062     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7063   }
7064   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7065     PetscInt noff;
7066 
7067     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7068     if (mmdata->mptmp[i]) continue;
7069     if (noff) {
7070       PetscInt nown;
7071 
7072       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7073       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7074       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7075       n_o += noff;
7076       n_d += nown;
7077     } else {
7078       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7079 
7080       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7081       n_d += mm->nz;
7082     }
7083   }
7084   if (mmdata->hasoffproc) { /* offprocess insertion */
7085     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7086     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7087   }
7088   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7089   PetscFunctionReturn(PETSC_SUCCESS);
7090 }
7091 
7092 /* Support for Pt * A, A * P, or Pt * A * P */
7093 #define MAX_NUMBER_INTERMEDIATE 4
7094 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7095 {
7096   Mat_Product           *product = C->product;
7097   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7098   Mat_MPIAIJ            *a, *p;
7099   MatMatMPIAIJBACKEND   *mmdata;
7100   ISLocalToGlobalMapping P_oth_l2g = NULL;
7101   IS                     glob      = NULL;
7102   const char            *prefix;
7103   char                   pprefix[256];
7104   const PetscInt        *globidx, *P_oth_idx;
7105   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7106   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7107   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7108                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7109                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7110   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7111 
7112   MatProductType ptype;
7113   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7114   PetscMPIInt    size;
7115 
7116   PetscFunctionBegin;
7117   MatCheckProduct(C, 1);
7118   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7119   ptype = product->type;
7120   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7121     ptype                                          = MATPRODUCT_AB;
7122     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7123   }
7124   switch (ptype) {
7125   case MATPRODUCT_AB:
7126     A          = product->A;
7127     P          = product->B;
7128     m          = A->rmap->n;
7129     n          = P->cmap->n;
7130     M          = A->rmap->N;
7131     N          = P->cmap->N;
7132     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7133     break;
7134   case MATPRODUCT_AtB:
7135     P          = product->A;
7136     A          = product->B;
7137     m          = P->cmap->n;
7138     n          = A->cmap->n;
7139     M          = P->cmap->N;
7140     N          = A->cmap->N;
7141     hasoffproc = PETSC_TRUE;
7142     break;
7143   case MATPRODUCT_PtAP:
7144     A          = product->A;
7145     P          = product->B;
7146     m          = P->cmap->n;
7147     n          = P->cmap->n;
7148     M          = P->cmap->N;
7149     N          = P->cmap->N;
7150     hasoffproc = PETSC_TRUE;
7151     break;
7152   default:
7153     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7154   }
7155   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7156   if (size == 1) hasoffproc = PETSC_FALSE;
7157 
7158   /* defaults */
7159   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7160     mp[i]    = NULL;
7161     mptmp[i] = PETSC_FALSE;
7162     rmapt[i] = -1;
7163     cmapt[i] = -1;
7164     rmapa[i] = NULL;
7165     cmapa[i] = NULL;
7166   }
7167 
7168   /* customization */
7169   PetscCall(PetscNew(&mmdata));
7170   mmdata->reusesym = product->api_user;
7171   if (ptype == MATPRODUCT_AB) {
7172     if (product->api_user) {
7173       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7174       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7175       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7176       PetscOptionsEnd();
7177     } else {
7178       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7179       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7180       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7181       PetscOptionsEnd();
7182     }
7183   } else if (ptype == MATPRODUCT_PtAP) {
7184     if (product->api_user) {
7185       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7186       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7187       PetscOptionsEnd();
7188     } else {
7189       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7190       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7191       PetscOptionsEnd();
7192     }
7193   }
7194   a = (Mat_MPIAIJ *)A->data;
7195   p = (Mat_MPIAIJ *)P->data;
7196   PetscCall(MatSetSizes(C, m, n, M, N));
7197   PetscCall(PetscLayoutSetUp(C->rmap));
7198   PetscCall(PetscLayoutSetUp(C->cmap));
7199   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7200   PetscCall(MatGetOptionsPrefix(C, &prefix));
7201 
7202   cp = 0;
7203   switch (ptype) {
7204   case MATPRODUCT_AB: /* A * P */
7205     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7206 
7207     /* A_diag * P_local (merged or not) */
7208     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7209       /* P is product->B */
7210       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7211       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7212       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7213       PetscCall(MatProductSetFill(mp[cp], product->fill));
7214       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7215       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7216       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7217       mp[cp]->product->api_user = product->api_user;
7218       PetscCall(MatProductSetFromOptions(mp[cp]));
7219       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7220       PetscCall(ISGetIndices(glob, &globidx));
7221       rmapt[cp] = 1;
7222       cmapt[cp] = 2;
7223       cmapa[cp] = globidx;
7224       mptmp[cp] = PETSC_FALSE;
7225       cp++;
7226     } else { /* A_diag * P_diag and A_diag * P_off */
7227       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7228       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7229       PetscCall(MatProductSetFill(mp[cp], product->fill));
7230       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7231       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7232       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7233       mp[cp]->product->api_user = product->api_user;
7234       PetscCall(MatProductSetFromOptions(mp[cp]));
7235       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7236       rmapt[cp] = 1;
7237       cmapt[cp] = 1;
7238       mptmp[cp] = PETSC_FALSE;
7239       cp++;
7240       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7241       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7242       PetscCall(MatProductSetFill(mp[cp], product->fill));
7243       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7244       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7245       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7246       mp[cp]->product->api_user = product->api_user;
7247       PetscCall(MatProductSetFromOptions(mp[cp]));
7248       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7249       rmapt[cp] = 1;
7250       cmapt[cp] = 2;
7251       cmapa[cp] = p->garray;
7252       mptmp[cp] = PETSC_FALSE;
7253       cp++;
7254     }
7255 
7256     /* A_off * P_other */
7257     if (mmdata->P_oth) {
7258       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7259       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7260       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7261       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7262       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7263       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7264       PetscCall(MatProductSetFill(mp[cp], product->fill));
7265       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7266       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7267       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7268       mp[cp]->product->api_user = product->api_user;
7269       PetscCall(MatProductSetFromOptions(mp[cp]));
7270       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7271       rmapt[cp] = 1;
7272       cmapt[cp] = 2;
7273       cmapa[cp] = P_oth_idx;
7274       mptmp[cp] = PETSC_FALSE;
7275       cp++;
7276     }
7277     break;
7278 
7279   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7280     /* A is product->B */
7281     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7282     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7283       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7284       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7285       PetscCall(MatProductSetFill(mp[cp], product->fill));
7286       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7287       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7288       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7289       mp[cp]->product->api_user = product->api_user;
7290       PetscCall(MatProductSetFromOptions(mp[cp]));
7291       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7292       PetscCall(ISGetIndices(glob, &globidx));
7293       rmapt[cp] = 2;
7294       rmapa[cp] = globidx;
7295       cmapt[cp] = 2;
7296       cmapa[cp] = globidx;
7297       mptmp[cp] = PETSC_FALSE;
7298       cp++;
7299     } else {
7300       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7301       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7302       PetscCall(MatProductSetFill(mp[cp], product->fill));
7303       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7304       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7305       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7306       mp[cp]->product->api_user = product->api_user;
7307       PetscCall(MatProductSetFromOptions(mp[cp]));
7308       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7309       PetscCall(ISGetIndices(glob, &globidx));
7310       rmapt[cp] = 1;
7311       cmapt[cp] = 2;
7312       cmapa[cp] = globidx;
7313       mptmp[cp] = PETSC_FALSE;
7314       cp++;
7315       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7316       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7317       PetscCall(MatProductSetFill(mp[cp], product->fill));
7318       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7319       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7320       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7321       mp[cp]->product->api_user = product->api_user;
7322       PetscCall(MatProductSetFromOptions(mp[cp]));
7323       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7324       rmapt[cp] = 2;
7325       rmapa[cp] = p->garray;
7326       cmapt[cp] = 2;
7327       cmapa[cp] = globidx;
7328       mptmp[cp] = PETSC_FALSE;
7329       cp++;
7330     }
7331     break;
7332   case MATPRODUCT_PtAP:
7333     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7334     /* P is product->B */
7335     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7336     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7337     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7338     PetscCall(MatProductSetFill(mp[cp], product->fill));
7339     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7340     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7341     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7342     mp[cp]->product->api_user = product->api_user;
7343     PetscCall(MatProductSetFromOptions(mp[cp]));
7344     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7345     PetscCall(ISGetIndices(glob, &globidx));
7346     rmapt[cp] = 2;
7347     rmapa[cp] = globidx;
7348     cmapt[cp] = 2;
7349     cmapa[cp] = globidx;
7350     mptmp[cp] = PETSC_FALSE;
7351     cp++;
7352     if (mmdata->P_oth) {
7353       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7354       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7355       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7356       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7357       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7358       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7359       PetscCall(MatProductSetFill(mp[cp], product->fill));
7360       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7361       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7362       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7363       mp[cp]->product->api_user = product->api_user;
7364       PetscCall(MatProductSetFromOptions(mp[cp]));
7365       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7366       mptmp[cp] = PETSC_TRUE;
7367       cp++;
7368       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7369       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7370       PetscCall(MatProductSetFill(mp[cp], product->fill));
7371       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7372       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7373       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7374       mp[cp]->product->api_user = product->api_user;
7375       PetscCall(MatProductSetFromOptions(mp[cp]));
7376       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7377       rmapt[cp] = 2;
7378       rmapa[cp] = globidx;
7379       cmapt[cp] = 2;
7380       cmapa[cp] = P_oth_idx;
7381       mptmp[cp] = PETSC_FALSE;
7382       cp++;
7383     }
7384     break;
7385   default:
7386     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7387   }
7388   /* sanity check */
7389   if (size > 1)
7390     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7391 
7392   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7393   for (i = 0; i < cp; i++) {
7394     mmdata->mp[i]    = mp[i];
7395     mmdata->mptmp[i] = mptmp[i];
7396   }
7397   mmdata->cp             = cp;
7398   C->product->data       = mmdata;
7399   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7400   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7401 
7402   /* memory type */
7403   mmdata->mtype = PETSC_MEMTYPE_HOST;
7404   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7405   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7406   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7407   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7408   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7409   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7410 
7411   /* prepare coo coordinates for values insertion */
7412 
7413   /* count total nonzeros of those intermediate seqaij Mats
7414     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7415     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7416     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7417   */
7418   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7419     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7420     if (mptmp[cp]) continue;
7421     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7422       const PetscInt *rmap = rmapa[cp];
7423       const PetscInt  mr   = mp[cp]->rmap->n;
7424       const PetscInt  rs   = C->rmap->rstart;
7425       const PetscInt  re   = C->rmap->rend;
7426       const PetscInt *ii   = mm->i;
7427       for (i = 0; i < mr; i++) {
7428         const PetscInt gr = rmap[i];
7429         const PetscInt nz = ii[i + 1] - ii[i];
7430         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7431         else ncoo_oown += nz;                  /* this row is local */
7432       }
7433     } else ncoo_d += mm->nz;
7434   }
7435 
7436   /*
7437     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7438 
7439     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7440 
7441     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7442 
7443     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7444     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7445     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7446 
7447     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7448     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7449   */
7450   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7451   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7452 
7453   /* gather (i,j) of nonzeros inserted by remote procs */
7454   if (hasoffproc) {
7455     PetscSF  msf;
7456     PetscInt ncoo2, *coo_i2, *coo_j2;
7457 
7458     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7459     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7460     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7461 
7462     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7463       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7464       PetscInt   *idxoff = mmdata->off[cp];
7465       PetscInt   *idxown = mmdata->own[cp];
7466       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7467         const PetscInt *rmap = rmapa[cp];
7468         const PetscInt *cmap = cmapa[cp];
7469         const PetscInt *ii   = mm->i;
7470         PetscInt       *coi  = coo_i + ncoo_o;
7471         PetscInt       *coj  = coo_j + ncoo_o;
7472         const PetscInt  mr   = mp[cp]->rmap->n;
7473         const PetscInt  rs   = C->rmap->rstart;
7474         const PetscInt  re   = C->rmap->rend;
7475         const PetscInt  cs   = C->cmap->rstart;
7476         for (i = 0; i < mr; i++) {
7477           const PetscInt *jj = mm->j + ii[i];
7478           const PetscInt  gr = rmap[i];
7479           const PetscInt  nz = ii[i + 1] - ii[i];
7480           if (gr < rs || gr >= re) { /* this is an offproc row */
7481             for (j = ii[i]; j < ii[i + 1]; j++) {
7482               *coi++    = gr;
7483               *idxoff++ = j;
7484             }
7485             if (!cmapt[cp]) { /* already global */
7486               for (j = 0; j < nz; j++) *coj++ = jj[j];
7487             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7488               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7489             } else { /* offdiag */
7490               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7491             }
7492             ncoo_o += nz;
7493           } else { /* this is a local row */
7494             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7495           }
7496         }
7497       }
7498       mmdata->off[cp + 1] = idxoff;
7499       mmdata->own[cp + 1] = idxown;
7500     }
7501 
7502     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7503     PetscInt incoo_o;
7504     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7505     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7506     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7507     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7508     ncoo = ncoo_d + ncoo_oown + ncoo2;
7509     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7510     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7511     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7512     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7513     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7514     PetscCall(PetscFree2(coo_i, coo_j));
7515     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7516     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7517     coo_i = coo_i2;
7518     coo_j = coo_j2;
7519   } else { /* no offproc values insertion */
7520     ncoo = ncoo_d;
7521     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7522 
7523     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7524     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7525     PetscCall(PetscSFSetUp(mmdata->sf));
7526   }
7527   mmdata->hasoffproc = hasoffproc;
7528 
7529   /* gather (i,j) of nonzeros inserted locally */
7530   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7531     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7532     PetscInt       *coi  = coo_i + ncoo_d;
7533     PetscInt       *coj  = coo_j + ncoo_d;
7534     const PetscInt *jj   = mm->j;
7535     const PetscInt *ii   = mm->i;
7536     const PetscInt *cmap = cmapa[cp];
7537     const PetscInt *rmap = rmapa[cp];
7538     const PetscInt  mr   = mp[cp]->rmap->n;
7539     const PetscInt  rs   = C->rmap->rstart;
7540     const PetscInt  re   = C->rmap->rend;
7541     const PetscInt  cs   = C->cmap->rstart;
7542 
7543     if (mptmp[cp]) continue;
7544     if (rmapt[cp] == 1) { /* consecutive rows */
7545       /* fill coo_i */
7546       for (i = 0; i < mr; i++) {
7547         const PetscInt gr = i + rs;
7548         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7549       }
7550       /* fill coo_j */
7551       if (!cmapt[cp]) { /* type-0, already global */
7552         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7553       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7554         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7555       } else {                                            /* type-2, local to global for sparse columns */
7556         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7557       }
7558       ncoo_d += mm->nz;
7559     } else if (rmapt[cp] == 2) { /* sparse rows */
7560       for (i = 0; i < mr; i++) {
7561         const PetscInt *jj = mm->j + ii[i];
7562         const PetscInt  gr = rmap[i];
7563         const PetscInt  nz = ii[i + 1] - ii[i];
7564         if (gr >= rs && gr < re) { /* local rows */
7565           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7566           if (!cmapt[cp]) { /* type-0, already global */
7567             for (j = 0; j < nz; j++) *coj++ = jj[j];
7568           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7569             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7570           } else { /* type-2, local to global for sparse columns */
7571             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7572           }
7573           ncoo_d += nz;
7574         }
7575       }
7576     }
7577   }
7578   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7579   PetscCall(ISDestroy(&glob));
7580   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7581   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7582   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7583   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7584 
7585   /* set block sizes */
7586   A = product->A;
7587   P = product->B;
7588   switch (ptype) {
7589   case MATPRODUCT_PtAP:
7590     PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs));
7591     break;
7592   case MATPRODUCT_RARt:
7593     PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs));
7594     break;
7595   case MATPRODUCT_ABC:
7596     PetscCall(MatSetBlockSizesFromMats(C, A, product->C));
7597     break;
7598   case MATPRODUCT_AB:
7599     PetscCall(MatSetBlockSizesFromMats(C, A, P));
7600     break;
7601   case MATPRODUCT_AtB:
7602     PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs));
7603     break;
7604   case MATPRODUCT_ABt:
7605     PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs));
7606     break;
7607   default:
7608     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]);
7609   }
7610 
7611   /* preallocate with COO data */
7612   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7613   PetscCall(PetscFree2(coo_i, coo_j));
7614   PetscFunctionReturn(PETSC_SUCCESS);
7615 }
7616 
7617 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7618 {
7619   Mat_Product *product = mat->product;
7620 #if defined(PETSC_HAVE_DEVICE)
7621   PetscBool match  = PETSC_FALSE;
7622   PetscBool usecpu = PETSC_FALSE;
7623 #else
7624   PetscBool match = PETSC_TRUE;
7625 #endif
7626 
7627   PetscFunctionBegin;
7628   MatCheckProduct(mat, 1);
7629 #if defined(PETSC_HAVE_DEVICE)
7630   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7631   if (match) { /* we can always fallback to the CPU if requested */
7632     switch (product->type) {
7633     case MATPRODUCT_AB:
7634       if (product->api_user) {
7635         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7636         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7637         PetscOptionsEnd();
7638       } else {
7639         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7640         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7641         PetscOptionsEnd();
7642       }
7643       break;
7644     case MATPRODUCT_AtB:
7645       if (product->api_user) {
7646         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7647         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7648         PetscOptionsEnd();
7649       } else {
7650         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7651         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7652         PetscOptionsEnd();
7653       }
7654       break;
7655     case MATPRODUCT_PtAP:
7656       if (product->api_user) {
7657         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7658         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7659         PetscOptionsEnd();
7660       } else {
7661         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7662         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7663         PetscOptionsEnd();
7664       }
7665       break;
7666     default:
7667       break;
7668     }
7669     match = (PetscBool)!usecpu;
7670   }
7671 #endif
7672   if (match) {
7673     switch (product->type) {
7674     case MATPRODUCT_AB:
7675     case MATPRODUCT_AtB:
7676     case MATPRODUCT_PtAP:
7677       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7678       break;
7679     default:
7680       break;
7681     }
7682   }
7683   /* fallback to MPIAIJ ops */
7684   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7685   PetscFunctionReturn(PETSC_SUCCESS);
7686 }
7687 
7688 /*
7689    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7690 
7691    n - the number of block indices in cc[]
7692    cc - the block indices (must be large enough to contain the indices)
7693 */
7694 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7695 {
7696   PetscInt        cnt = -1, nidx, j;
7697   const PetscInt *idx;
7698 
7699   PetscFunctionBegin;
7700   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7701   if (nidx) {
7702     cnt     = 0;
7703     cc[cnt] = idx[0] / bs;
7704     for (j = 1; j < nidx; j++) {
7705       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7706     }
7707   }
7708   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7709   *n = cnt + 1;
7710   PetscFunctionReturn(PETSC_SUCCESS);
7711 }
7712 
7713 /*
7714     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7715 
7716     ncollapsed - the number of block indices
7717     collapsed - the block indices (must be large enough to contain the indices)
7718 */
7719 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7720 {
7721   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7722 
7723   PetscFunctionBegin;
7724   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7725   for (i = start + 1; i < start + bs; i++) {
7726     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7727     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7728     cprevtmp = cprev;
7729     cprev    = merged;
7730     merged   = cprevtmp;
7731   }
7732   *ncollapsed = nprev;
7733   if (collapsed) *collapsed = cprev;
7734   PetscFunctionReturn(PETSC_SUCCESS);
7735 }
7736 
7737 /*
7738  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7739 
7740  Input Parameter:
7741  . Amat - matrix
7742  - symmetrize - make the result symmetric
7743  + scale - scale with diagonal
7744 
7745  Output Parameter:
7746  . a_Gmat - output scalar graph >= 0
7747 
7748 */
7749 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7750 {
7751   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7752   MPI_Comm  comm;
7753   Mat       Gmat;
7754   PetscBool ismpiaij, isseqaij;
7755   Mat       a, b, c;
7756   MatType   jtype;
7757 
7758   PetscFunctionBegin;
7759   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7760   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7761   PetscCall(MatGetSize(Amat, &MM, &NN));
7762   PetscCall(MatGetBlockSize(Amat, &bs));
7763   nloc = (Iend - Istart) / bs;
7764 
7765   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7766   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7767   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7768 
7769   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7770   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7771      implementation */
7772   if (bs > 1) {
7773     PetscCall(MatGetType(Amat, &jtype));
7774     PetscCall(MatCreate(comm, &Gmat));
7775     PetscCall(MatSetType(Gmat, jtype));
7776     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7777     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7778     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7779       PetscInt  *d_nnz, *o_nnz;
7780       MatScalar *aa, val, *AA;
7781       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7782 
7783       if (isseqaij) {
7784         a = Amat;
7785         b = NULL;
7786       } else {
7787         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7788         a             = d->A;
7789         b             = d->B;
7790       }
7791       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7792       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7793       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7794         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7795         const PetscInt *cols1, *cols2;
7796 
7797         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7798           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7799           nnz[brow / bs] = nc2 / bs;
7800           if (nc2 % bs) ok = 0;
7801           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7802           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7803             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7804             if (nc1 != nc2) ok = 0;
7805             else {
7806               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7807                 if (cols1[jj] != cols2[jj]) ok = 0;
7808                 if (cols1[jj] % bs != jj % bs) ok = 0;
7809               }
7810             }
7811             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7812           }
7813           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7814           if (!ok) {
7815             PetscCall(PetscFree2(d_nnz, o_nnz));
7816             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7817             goto old_bs;
7818           }
7819         }
7820       }
7821       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7822       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7823       PetscCall(PetscFree2(d_nnz, o_nnz));
7824       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7825       // diag
7826       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7827         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7828 
7829         ai = aseq->i;
7830         n  = ai[brow + 1] - ai[brow];
7831         aj = aseq->j + ai[brow];
7832         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7833           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7834           val        = 0;
7835           if (index_size == 0) {
7836             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7837               aa = aseq->a + ai[brow + ii] + k;
7838               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7839                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7840               }
7841             }
7842           } else {                                            // use (index,index) value if provided
7843             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7844               PetscInt ii = index[iii];
7845               aa          = aseq->a + ai[brow + ii] + k;
7846               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7847                 PetscInt jj = index[jjj];
7848                 val += PetscAbs(PetscRealPart(aa[jj]));
7849               }
7850             }
7851           }
7852           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7853           AA[k / bs] = val;
7854         }
7855         grow = Istart / bs + brow / bs;
7856         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7857       }
7858       // off-diag
7859       if (ismpiaij) {
7860         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7861         const PetscScalar *vals;
7862         const PetscInt    *cols, *garray = aij->garray;
7863 
7864         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7865         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7866           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7867           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7868             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7869             AA[k / bs] = 0;
7870             AJ[cidx]   = garray[cols[k]] / bs;
7871           }
7872           nc = ncols / bs;
7873           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7874           if (index_size == 0) {
7875             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7876               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7877               for (PetscInt k = 0; k < ncols; k += bs) {
7878                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7879                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7880                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7881                 }
7882               }
7883               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7884             }
7885           } else {                                            // use (index,index) value if provided
7886             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7887               PetscInt ii = index[iii];
7888               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7889               for (PetscInt k = 0; k < ncols; k += bs) {
7890                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7891                   PetscInt jj = index[jjj];
7892                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7893                 }
7894               }
7895               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7896             }
7897           }
7898           grow = Istart / bs + brow / bs;
7899           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7900         }
7901       }
7902       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7903       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7904       PetscCall(PetscFree2(AA, AJ));
7905     } else {
7906       const PetscScalar *vals;
7907       const PetscInt    *idx;
7908       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7909     old_bs:
7910       /*
7911        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7912        */
7913       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7914       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7915       if (isseqaij) {
7916         PetscInt max_d_nnz;
7917 
7918         /*
7919          Determine exact preallocation count for (sequential) scalar matrix
7920          */
7921         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7922         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7923         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7924         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7925         PetscCall(PetscFree3(w0, w1, w2));
7926       } else if (ismpiaij) {
7927         Mat             Daij, Oaij;
7928         const PetscInt *garray;
7929         PetscInt        max_d_nnz;
7930 
7931         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7932         /*
7933          Determine exact preallocation count for diagonal block portion of scalar matrix
7934          */
7935         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7936         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7937         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7938         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7939         PetscCall(PetscFree3(w0, w1, w2));
7940         /*
7941          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7942          */
7943         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7944           o_nnz[jj] = 0;
7945           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7946             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7947             o_nnz[jj] += ncols;
7948             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7949           }
7950           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7951         }
7952       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7953       /* get scalar copy (norms) of matrix */
7954       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7955       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7956       PetscCall(PetscFree2(d_nnz, o_nnz));
7957       for (Ii = Istart; Ii < Iend; Ii++) {
7958         PetscInt dest_row = Ii / bs;
7959 
7960         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7961         for (jj = 0; jj < ncols; jj++) {
7962           PetscInt    dest_col = idx[jj] / bs;
7963           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7964 
7965           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7966         }
7967         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7968       }
7969       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7970       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7971     }
7972   } else {
7973     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7974     else {
7975       Gmat = Amat;
7976       PetscCall(PetscObjectReference((PetscObject)Gmat));
7977     }
7978     if (isseqaij) {
7979       a = Gmat;
7980       b = NULL;
7981     } else {
7982       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7983       a             = d->A;
7984       b             = d->B;
7985     }
7986     if (filter >= 0 || scale) {
7987       /* take absolute value of each entry */
7988       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7989         MatInfo      info;
7990         PetscScalar *avals;
7991 
7992         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7993         PetscCall(MatSeqAIJGetArray(c, &avals));
7994         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7995         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7996       }
7997     }
7998   }
7999   if (symmetrize) {
8000     PetscBool isset, issym;
8001 
8002     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8003     if (!isset || !issym) {
8004       Mat matTrans;
8005 
8006       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8007       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8008       PetscCall(MatDestroy(&matTrans));
8009     }
8010     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8011   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8012   if (scale) {
8013     /* scale c for all diagonal values = 1 or -1 */
8014     Vec diag;
8015 
8016     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8017     PetscCall(MatGetDiagonal(Gmat, diag));
8018     PetscCall(VecReciprocal(diag));
8019     PetscCall(VecSqrtAbs(diag));
8020     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8021     PetscCall(VecDestroy(&diag));
8022   }
8023   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8024   if (filter >= 0) {
8025     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8026     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8027   }
8028   *a_Gmat = Gmat;
8029   PetscFunctionReturn(PETSC_SUCCESS);
8030 }
8031 
8032 PETSC_INTERN PetscErrorCode MatGetCurrentMemType_MPIAIJ(Mat A, PetscMemType *memtype)
8033 {
8034   Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
8035   PetscMemType mD = PETSC_MEMTYPE_HOST, mO = PETSC_MEMTYPE_HOST;
8036 
8037   PetscFunctionBegin;
8038   if (mpiaij->A) PetscCall(MatGetCurrentMemType(mpiaij->A, &mD));
8039   if (mpiaij->B) PetscCall(MatGetCurrentMemType(mpiaij->B, &mO));
8040   *memtype = (mD == mO) ? mD : PETSC_MEMTYPE_HOST;
8041   PetscFunctionReturn(PETSC_SUCCESS);
8042 }
8043 
8044 /*
8045     Special version for direct calls from Fortran
8046 */
8047 
8048 /* Change these macros so can be used in void function */
8049 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8050 #undef PetscCall
8051 #define PetscCall(...) \
8052   do { \
8053     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8054     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8055       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8056       return; \
8057     } \
8058   } while (0)
8059 
8060 #undef SETERRQ
8061 #define SETERRQ(comm, ierr, ...) \
8062   do { \
8063     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8064     return; \
8065   } while (0)
8066 
8067 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8068   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8069 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8070   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8071 #else
8072 #endif
8073 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8074 {
8075   Mat         mat = *mmat;
8076   PetscInt    m = *mm, n = *mn;
8077   InsertMode  addv = *maddv;
8078   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8079   PetscScalar value;
8080 
8081   MatCheckPreallocated(mat, 1);
8082   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8083   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8084   {
8085     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8086     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8087     PetscBool roworiented = aij->roworiented;
8088 
8089     /* Some Variables required in the macro */
8090     Mat         A     = aij->A;
8091     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8092     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8093     MatScalar  *aa;
8094     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8095     Mat         B                 = aij->B;
8096     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8097     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8098     MatScalar  *ba;
8099     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8100      * cannot use "#if defined" inside a macro. */
8101     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8102 
8103     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8104     PetscInt   nonew = a->nonew;
8105     MatScalar *ap1, *ap2;
8106 
8107     PetscFunctionBegin;
8108     PetscCall(MatSeqAIJGetArray(A, &aa));
8109     PetscCall(MatSeqAIJGetArray(B, &ba));
8110     for (i = 0; i < m; i++) {
8111       if (im[i] < 0) continue;
8112       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8113       if (im[i] >= rstart && im[i] < rend) {
8114         row      = im[i] - rstart;
8115         lastcol1 = -1;
8116         rp1      = aj + ai[row];
8117         ap1      = aa + ai[row];
8118         rmax1    = aimax[row];
8119         nrow1    = ailen[row];
8120         low1     = 0;
8121         high1    = nrow1;
8122         lastcol2 = -1;
8123         rp2      = bj + bi[row];
8124         ap2      = ba + bi[row];
8125         rmax2    = bimax[row];
8126         nrow2    = bilen[row];
8127         low2     = 0;
8128         high2    = nrow2;
8129 
8130         for (j = 0; j < n; j++) {
8131           if (roworiented) value = v[i * n + j];
8132           else value = v[i + j * m];
8133           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8134           if (in[j] >= cstart && in[j] < cend) {
8135             col = in[j] - cstart;
8136             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8137           } else if (in[j] < 0) continue;
8138           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8139             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8140           } else {
8141             if (mat->was_assembled) {
8142               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8143 #if defined(PETSC_USE_CTABLE)
8144               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8145               col--;
8146 #else
8147               col = aij->colmap[in[j]] - 1;
8148 #endif
8149               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8150                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8151                 col = in[j];
8152                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8153                 B        = aij->B;
8154                 b        = (Mat_SeqAIJ *)B->data;
8155                 bimax    = b->imax;
8156                 bi       = b->i;
8157                 bilen    = b->ilen;
8158                 bj       = b->j;
8159                 rp2      = bj + bi[row];
8160                 ap2      = ba + bi[row];
8161                 rmax2    = bimax[row];
8162                 nrow2    = bilen[row];
8163                 low2     = 0;
8164                 high2    = nrow2;
8165                 bm       = aij->B->rmap->n;
8166                 ba       = b->a;
8167                 inserted = PETSC_FALSE;
8168               }
8169             } else col = in[j];
8170             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8171           }
8172         }
8173       } else if (!aij->donotstash) {
8174         if (roworiented) {
8175           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8176         } else {
8177           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8178         }
8179       }
8180     }
8181     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8182     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8183   }
8184   PetscFunctionReturnVoid();
8185 }
8186 
8187 /* Undefining these here since they were redefined from their original definition above! No
8188  * other PETSc functions should be defined past this point, as it is impossible to recover the
8189  * original definitions */
8190 #undef PetscCall
8191 #undef SETERRQ
8192