xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 21e3ffae2f3b73c0bd738cf6d0a809700fc04bb0)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14 #if defined(PETSC_USE_LOG)
15   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
16 #endif
17   PetscCall(MatStashDestroy_Private(&mat->stash));
18   PetscCall(VecDestroy(&aij->diag));
19   PetscCall(MatDestroy(&aij->A));
20   PetscCall(MatDestroy(&aij->B));
21 #if defined(PETSC_USE_CTABLE)
22   PetscCall(PetscHMapIDestroy(&aij->colmap));
23 #else
24   PetscCall(PetscFree(aij->colmap));
25 #endif
26   PetscCall(PetscFree(aij->garray));
27   PetscCall(VecDestroy(&aij->lvec));
28   PetscCall(VecScatterDestroy(&aij->Mvctx));
29   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
30   PetscCall(PetscFree(aij->ld));
31 
32   /* Free COO */
33   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
34 
35   PetscCall(PetscFree(mat->data));
36 
37   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
38   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
39 
40   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
50 #if defined(PETSC_HAVE_CUDA)
51   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
52 #endif
53 #if defined(PETSC_HAVE_HIP)
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
55 #endif
56 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
57   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
58 #endif
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
60 #if defined(PETSC_HAVE_ELEMENTAL)
61   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
62 #endif
63 #if defined(PETSC_HAVE_SCALAPACK)
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
65 #endif
66 #if defined(PETSC_HAVE_HYPRE)
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
69 #endif
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
76 #if defined(PETSC_HAVE_MKL_SPARSE)
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
78 #endif
79   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
80   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
82   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
84   PetscFunctionReturn(PETSC_SUCCESS);
85 }
86 
87 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
88 #define TYPE AIJ
89 #define TYPE_AIJ
90 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
91 #undef TYPE
92 #undef TYPE_AIJ
93 
94 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
95 {
96   Mat B;
97 
98   PetscFunctionBegin;
99   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
100   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
101   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
102   PetscCall(MatDestroy(&B));
103   PetscFunctionReturn(PETSC_SUCCESS);
104 }
105 
106 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
107 {
108   Mat B;
109 
110   PetscFunctionBegin;
111   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
112   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
113   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 /*MC
118    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
119 
120    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
121    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
122   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
123   for communicators controlling multiple processes.  It is recommended that you call both of
124   the above preallocation routines for simplicity.
125 
126    Options Database Keys:
127 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
128 
129   Developer Note:
130     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
131    enough exist.
132 
133   Level: beginner
134 
135 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
136 M*/
137 
138 /*MC
139    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
140 
141    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
142    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
143    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
144   for communicators controlling multiple processes.  It is recommended that you call both of
145   the above preallocation routines for simplicity.
146 
147    Options Database Keys:
148 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
149 
150   Level: beginner
151 
152 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
153 M*/
154 
155 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
156 {
157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
158 
159   PetscFunctionBegin;
160 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
161   A->boundtocpu = flg;
162 #endif
163   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
164   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
165 
166   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
167    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
168    * to differ from the parent matrix. */
169   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
170   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
171 
172   PetscFunctionReturn(PETSC_SUCCESS);
173 }
174 
175 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
176 {
177   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
178 
179   PetscFunctionBegin;
180   if (mat->A) {
181     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
182     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
183   }
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
188 {
189   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
190   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
191   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
192   const PetscInt  *ia, *ib;
193   const MatScalar *aa, *bb, *aav, *bav;
194   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
195   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
196 
197   PetscFunctionBegin;
198   *keptrows = NULL;
199 
200   ia = a->i;
201   ib = b->i;
202   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
203   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
204   for (i = 0; i < m; i++) {
205     na = ia[i + 1] - ia[i];
206     nb = ib[i + 1] - ib[i];
207     if (!na && !nb) {
208       cnt++;
209       goto ok1;
210     }
211     aa = aav + ia[i];
212     for (j = 0; j < na; j++) {
213       if (aa[j] != 0.0) goto ok1;
214     }
215     bb = bav + ib[i];
216     for (j = 0; j < nb; j++) {
217       if (bb[j] != 0.0) goto ok1;
218     }
219     cnt++;
220   ok1:;
221   }
222   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
223   if (!n0rows) {
224     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
225     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
226     PetscFunctionReturn(PETSC_SUCCESS);
227   }
228   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
229   cnt = 0;
230   for (i = 0; i < m; i++) {
231     na = ia[i + 1] - ia[i];
232     nb = ib[i + 1] - ib[i];
233     if (!na && !nb) continue;
234     aa = aav + ia[i];
235     for (j = 0; j < na; j++) {
236       if (aa[j] != 0.0) {
237         rows[cnt++] = rstart + i;
238         goto ok2;
239       }
240     }
241     bb = bav + ib[i];
242     for (j = 0; j < nb; j++) {
243       if (bb[j] != 0.0) {
244         rows[cnt++] = rstart + i;
245         goto ok2;
246       }
247     }
248   ok2:;
249   }
250   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
251   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
252   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
253   PetscFunctionReturn(PETSC_SUCCESS);
254 }
255 
256 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
257 {
258   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
259   PetscBool   cong;
260 
261   PetscFunctionBegin;
262   PetscCall(MatHasCongruentLayouts(Y, &cong));
263   if (Y->assembled && cong) {
264     PetscCall(MatDiagonalSet(aij->A, D, is));
265   } else {
266     PetscCall(MatDiagonalSet_Default(Y, D, is));
267   }
268   PetscFunctionReturn(PETSC_SUCCESS);
269 }
270 
271 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
272 {
273   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
274   PetscInt    i, rstart, nrows, *rows;
275 
276   PetscFunctionBegin;
277   *zrows = NULL;
278   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
279   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
280   for (i = 0; i < nrows; i++) rows[i] += rstart;
281   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
282   PetscFunctionReturn(PETSC_SUCCESS);
283 }
284 
285 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
286 {
287   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
288   PetscInt           i, m, n, *garray = aij->garray;
289   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
290   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
291   PetscReal         *work;
292   const PetscScalar *dummy;
293 
294   PetscFunctionBegin;
295   PetscCall(MatGetSize(A, &m, &n));
296   PetscCall(PetscCalloc1(n, &work));
297   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
298   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
299   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
300   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
301   if (type == NORM_2) {
302     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
303     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
304   } else if (type == NORM_1) {
305     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
306     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
307   } else if (type == NORM_INFINITY) {
308     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
309     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
310   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
311     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
312     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
313   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
314     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
315     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
316   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
317   if (type == NORM_INFINITY) {
318     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
319   } else {
320     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
321   }
322   PetscCall(PetscFree(work));
323   if (type == NORM_2) {
324     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
325   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
326     for (i = 0; i < n; i++) reductions[i] /= m;
327   }
328   PetscFunctionReturn(PETSC_SUCCESS);
329 }
330 
331 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
332 {
333   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
334   IS              sis, gis;
335   const PetscInt *isis, *igis;
336   PetscInt        n, *iis, nsis, ngis, rstart, i;
337 
338   PetscFunctionBegin;
339   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
340   PetscCall(MatFindNonzeroRows(a->B, &gis));
341   PetscCall(ISGetSize(gis, &ngis));
342   PetscCall(ISGetSize(sis, &nsis));
343   PetscCall(ISGetIndices(sis, &isis));
344   PetscCall(ISGetIndices(gis, &igis));
345 
346   PetscCall(PetscMalloc1(ngis + nsis, &iis));
347   PetscCall(PetscArraycpy(iis, igis, ngis));
348   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
349   n = ngis + nsis;
350   PetscCall(PetscSortRemoveDupsInt(&n, iis));
351   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
352   for (i = 0; i < n; i++) iis[i] += rstart;
353   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
354 
355   PetscCall(ISRestoreIndices(sis, &isis));
356   PetscCall(ISRestoreIndices(gis, &igis));
357   PetscCall(ISDestroy(&sis));
358   PetscCall(ISDestroy(&gis));
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to access.
368 */
369 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
370 {
371   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
372   PetscInt    n   = aij->B->cmap->n, i;
373 
374   PetscFunctionBegin;
375   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
376 #if defined(PETSC_USE_CTABLE)
377   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
378   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
379 #else
380   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
381   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
382 #endif
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
387   { \
388     if (col <= lastcol1) low1 = 0; \
389     else high1 = nrow1; \
390     lastcol1 = col; \
391     while (high1 - low1 > 5) { \
392       t = (low1 + high1) / 2; \
393       if (rp1[t] > col) high1 = t; \
394       else low1 = t; \
395     } \
396     for (_i = low1; _i < high1; _i++) { \
397       if (rp1[_i] > col) break; \
398       if (rp1[_i] == col) { \
399         if (addv == ADD_VALUES) { \
400           ap1[_i] += value; \
401           /* Not sure LogFlops will slow dow the code or not */ \
402           (void)PetscLogFlops(1.0); \
403         } else ap1[_i] = value; \
404         goto a_noinsert; \
405       } \
406     } \
407     if (value == 0.0 && ignorezeroentries && row != col) { \
408       low1  = 0; \
409       high1 = nrow1; \
410       goto a_noinsert; \
411     } \
412     if (nonew == 1) { \
413       low1  = 0; \
414       high1 = nrow1; \
415       goto a_noinsert; \
416     } \
417     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
418     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
419     N = nrow1++ - 1; \
420     a->nz++; \
421     high1++; \
422     /* shift up all the later entries in this row */ \
423     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
424     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
425     rp1[_i] = col; \
426     ap1[_i] = value; \
427     A->nonzerostate++; \
428   a_noinsert:; \
429     ailen[row] = nrow1; \
430   }
431 
432 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
433   { \
434     if (col <= lastcol2) low2 = 0; \
435     else high2 = nrow2; \
436     lastcol2 = col; \
437     while (high2 - low2 > 5) { \
438       t = (low2 + high2) / 2; \
439       if (rp2[t] > col) high2 = t; \
440       else low2 = t; \
441     } \
442     for (_i = low2; _i < high2; _i++) { \
443       if (rp2[_i] > col) break; \
444       if (rp2[_i] == col) { \
445         if (addv == ADD_VALUES) { \
446           ap2[_i] += value; \
447           (void)PetscLogFlops(1.0); \
448         } else ap2[_i] = value; \
449         goto b_noinsert; \
450       } \
451     } \
452     if (value == 0.0 && ignorezeroentries) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     if (nonew == 1) { \
458       low2  = 0; \
459       high2 = nrow2; \
460       goto b_noinsert; \
461     } \
462     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
463     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
464     N = nrow2++ - 1; \
465     b->nz++; \
466     high2++; \
467     /* shift up all the later entries in this row */ \
468     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
469     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
470     rp2[_i] = col; \
471     ap2[_i] = value; \
472     B->nonzerostate++; \
473   b_noinsert:; \
474     bilen[row] = nrow2; \
475   }
476 
477 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
478 {
479   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
480   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
481   PetscInt     l, *garray                         = mat->garray, diag;
482   PetscScalar *aa, *ba;
483 
484   PetscFunctionBegin;
485   /* code only works for square matrices A */
486 
487   /* find size of row to the left of the diagonal part */
488   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
489   row = row - diag;
490   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
491     if (garray[b->j[b->i[row] + l]] > diag) break;
492   }
493   if (l) {
494     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
495     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
496     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
497   }
498 
499   /* diagonal part */
500   if (a->i[row + 1] - a->i[row]) {
501     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
502     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
503     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
504   }
505 
506   /* right of diagonal part */
507   if (b->i[row + 1] - b->i[row] - l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512   PetscFunctionReturn(PETSC_SUCCESS);
513 }
514 
515 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
516 {
517   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
518   PetscScalar value = 0.0;
519   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
520   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
521   PetscBool   roworiented = aij->roworiented;
522 
523   /* Some Variables required in the macro */
524   Mat         A     = aij->A;
525   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
526   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
527   PetscBool   ignorezeroentries = a->ignorezeroentries;
528   Mat         B                 = aij->B;
529   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
530   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
531   MatScalar  *aa, *ba;
532   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
533   PetscInt    nonew;
534   MatScalar  *ap1, *ap2;
535 
536   PetscFunctionBegin;
537   PetscCall(MatSeqAIJGetArray(A, &aa));
538   PetscCall(MatSeqAIJGetArray(B, &ba));
539   for (i = 0; i < m; i++) {
540     if (im[i] < 0) continue;
541     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
542     if (im[i] >= rstart && im[i] < rend) {
543       row      = im[i] - rstart;
544       lastcol1 = -1;
545       rp1      = aj + ai[row];
546       ap1      = aa + ai[row];
547       rmax1    = aimax[row];
548       nrow1    = ailen[row];
549       low1     = 0;
550       high1    = nrow1;
551       lastcol2 = -1;
552       rp2      = bj + bi[row];
553       ap2      = ba + bi[row];
554       rmax2    = bimax[row];
555       nrow2    = bilen[row];
556       low2     = 0;
557       high2    = nrow2;
558 
559       for (j = 0; j < n; j++) {
560         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
561         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
562         if (in[j] >= cstart && in[j] < cend) {
563           col   = in[j] - cstart;
564           nonew = a->nonew;
565           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
566         } else if (in[j] < 0) {
567           continue;
568         } else {
569           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
570           if (mat->was_assembled) {
571             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
572 #if defined(PETSC_USE_CTABLE)
573             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
574             col--;
575 #else
576             col = aij->colmap[in[j]] - 1;
577 #endif
578             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
579               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
580               col = in[j];
581               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
582               B     = aij->B;
583               b     = (Mat_SeqAIJ *)B->data;
584               bimax = b->imax;
585               bi    = b->i;
586               bilen = b->ilen;
587               bj    = b->j;
588               ba    = b->a;
589               rp2   = bj + bi[row];
590               ap2   = ba + bi[row];
591               rmax2 = bimax[row];
592               nrow2 = bilen[row];
593               low2  = 0;
594               high2 = nrow2;
595               bm    = aij->B->rmap->n;
596               ba    = b->a;
597             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
598               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
599                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
600               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
601             }
602           } else col = in[j];
603           nonew = b->nonew;
604           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
605         }
606       }
607     } else {
608       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
609       if (!aij->donotstash) {
610         mat->assembled = PETSC_FALSE;
611         if (roworiented) {
612           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
613         } else {
614           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
615         }
616       }
617     }
618   }
619   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
620   PetscCall(MatSeqAIJRestoreArray(B, &ba));
621   PetscFunctionReturn(PETSC_SUCCESS);
622 }
623 
624 /*
625     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
626     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
627     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
628 */
629 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
630 {
631   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
632   Mat         A      = aij->A; /* diagonal part of the matrix */
633   Mat         B      = aij->B; /* offdiagonal part of the matrix */
634   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
635   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
636   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
637   PetscInt   *ailen = a->ilen, *aj = a->j;
638   PetscInt   *bilen = b->ilen, *bj = b->j;
639   PetscInt    am          = aij->A->rmap->n, j;
640   PetscInt    diag_so_far = 0, dnz;
641   PetscInt    offd_so_far = 0, onz;
642 
643   PetscFunctionBegin;
644   /* Iterate over all rows of the matrix */
645   for (j = 0; j < am; j++) {
646     dnz = onz = 0;
647     /*  Iterate over all non-zero columns of the current row */
648     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
649       /* If column is in the diagonal */
650       if (mat_j[col] >= cstart && mat_j[col] < cend) {
651         aj[diag_so_far++] = mat_j[col] - cstart;
652         dnz++;
653       } else { /* off-diagonal entries */
654         bj[offd_so_far++] = mat_j[col];
655         onz++;
656       }
657     }
658     ailen[j] = dnz;
659     bilen[j] = onz;
660   }
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 /*
665     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
666     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
667     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
668     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
669     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
670 */
671 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
672 {
673   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
674   Mat          A    = aij->A; /* diagonal part of the matrix */
675   Mat          B    = aij->B; /* offdiagonal part of the matrix */
676   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
677   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
678   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
679   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
680   PetscInt    *ailen = a->ilen, *aj = a->j;
681   PetscInt    *bilen = b->ilen, *bj = b->j;
682   PetscInt     am          = aij->A->rmap->n, j;
683   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
684   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
685   PetscScalar *aa = a->a, *ba = b->a;
686 
687   PetscFunctionBegin;
688   /* Iterate over all rows of the matrix */
689   for (j = 0; j < am; j++) {
690     dnz_row = onz_row = 0;
691     rowstart_offd     = full_offd_i[j];
692     rowstart_diag     = full_diag_i[j];
693     /*  Iterate over all non-zero columns of the current row */
694     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
695       /* If column is in the diagonal */
696       if (mat_j[col] >= cstart && mat_j[col] < cend) {
697         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
698         aa[rowstart_diag + dnz_row] = mat_a[col];
699         dnz_row++;
700       } else { /* off-diagonal entries */
701         bj[rowstart_offd + onz_row] = mat_j[col];
702         ba[rowstart_offd + onz_row] = mat_a[col];
703         onz_row++;
704       }
705     }
706     ailen[j] = dnz_row;
707     bilen[j] = onz_row;
708   }
709   PetscFunctionReturn(PETSC_SUCCESS);
710 }
711 
712 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
713 {
714   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
715   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
716   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
717 
718   PetscFunctionBegin;
719   for (i = 0; i < m; i++) {
720     if (idxm[i] < 0) continue; /* negative row */
721     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
722     if (idxm[i] >= rstart && idxm[i] < rend) {
723       row = idxm[i] - rstart;
724       for (j = 0; j < n; j++) {
725         if (idxn[j] < 0) continue; /* negative column */
726         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
727         if (idxn[j] >= cstart && idxn[j] < cend) {
728           col = idxn[j] - cstart;
729           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
730         } else {
731           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
732 #if defined(PETSC_USE_CTABLE)
733           PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
734           col--;
735 #else
736           col = aij->colmap[idxn[j]] - 1;
737 #endif
738           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
739           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
740         }
741       }
742     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
743   }
744   PetscFunctionReturn(PETSC_SUCCESS);
745 }
746 
747 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
748 {
749   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
750   PetscInt    nstash, reallocs;
751 
752   PetscFunctionBegin;
753   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
754 
755   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
756   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
757   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
758   PetscFunctionReturn(PETSC_SUCCESS);
759 }
760 
761 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
762 {
763   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
764   PetscMPIInt  n;
765   PetscInt     i, j, rstart, ncols, flg;
766   PetscInt    *row, *col;
767   PetscBool    other_disassembled;
768   PetscScalar *val;
769 
770   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
771 
772   PetscFunctionBegin;
773   if (!aij->donotstash && !mat->nooffprocentries) {
774     while (1) {
775       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
776       if (!flg) break;
777 
778       for (i = 0; i < n;) {
779         /* Now identify the consecutive vals belonging to the same row */
780         for (j = i, rstart = row[j]; j < n; j++) {
781           if (row[j] != rstart) break;
782         }
783         if (j < n) ncols = j - i;
784         else ncols = n - i;
785         /* Now assemble all these values with a single function call */
786         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
787         i = j;
788       }
789     }
790     PetscCall(MatStashScatterEnd_Private(&mat->stash));
791   }
792 #if defined(PETSC_HAVE_DEVICE)
793   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
794   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
795   if (mat->boundtocpu) {
796     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
797     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
798   }
799 #endif
800   PetscCall(MatAssemblyBegin(aij->A, mode));
801   PetscCall(MatAssemblyEnd(aij->A, mode));
802 
803   /* determine if any processor has disassembled, if so we must
804      also disassemble ourself, in order that we may reassemble. */
805   /*
806      if nonzero structure of submatrix B cannot change then we know that
807      no processor disassembled thus we can skip this stuff
808   */
809   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
810     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
811     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
812       PetscCall(MatDisAssemble_MPIAIJ(mat));
813     }
814   }
815   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
816   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
817 #if defined(PETSC_HAVE_DEVICE)
818   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
819 #endif
820   PetscCall(MatAssemblyBegin(aij->B, mode));
821   PetscCall(MatAssemblyEnd(aij->B, mode));
822 
823   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
824 
825   aij->rowvalues = NULL;
826 
827   PetscCall(VecDestroy(&aij->diag));
828 
829   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
830   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
831     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
832     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
833   }
834 #if defined(PETSC_HAVE_DEVICE)
835   mat->offloadmask = PETSC_OFFLOAD_BOTH;
836 #endif
837   PetscFunctionReturn(PETSC_SUCCESS);
838 }
839 
840 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
841 {
842   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
843 
844   PetscFunctionBegin;
845   PetscCall(MatZeroEntries(l->A));
846   PetscCall(MatZeroEntries(l->B));
847   PetscFunctionReturn(PETSC_SUCCESS);
848 }
849 
850 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
851 {
852   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
853   PetscObjectState sA, sB;
854   PetscInt        *lrows;
855   PetscInt         r, len;
856   PetscBool        cong, lch, gch;
857 
858   PetscFunctionBegin;
859   /* get locally owned rows */
860   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
861   PetscCall(MatHasCongruentLayouts(A, &cong));
862   /* fix right hand side if needed */
863   if (x && b) {
864     const PetscScalar *xx;
865     PetscScalar       *bb;
866 
867     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
868     PetscCall(VecGetArrayRead(x, &xx));
869     PetscCall(VecGetArray(b, &bb));
870     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
871     PetscCall(VecRestoreArrayRead(x, &xx));
872     PetscCall(VecRestoreArray(b, &bb));
873   }
874 
875   sA = mat->A->nonzerostate;
876   sB = mat->B->nonzerostate;
877 
878   if (diag != 0.0 && cong) {
879     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
880     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
881   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
882     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
883     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
884     PetscInt    nnwA, nnwB;
885     PetscBool   nnzA, nnzB;
886 
887     nnwA = aijA->nonew;
888     nnwB = aijB->nonew;
889     nnzA = aijA->keepnonzeropattern;
890     nnzB = aijB->keepnonzeropattern;
891     if (!nnzA) {
892       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
893       aijA->nonew = 0;
894     }
895     if (!nnzB) {
896       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
897       aijB->nonew = 0;
898     }
899     /* Must zero here before the next loop */
900     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
901     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
902     for (r = 0; r < len; ++r) {
903       const PetscInt row = lrows[r] + A->rmap->rstart;
904       if (row >= A->cmap->N) continue;
905       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
906     }
907     aijA->nonew = nnwA;
908     aijB->nonew = nnwB;
909   } else {
910     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
911     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
912   }
913   PetscCall(PetscFree(lrows));
914   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
915   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
916 
917   /* reduce nonzerostate */
918   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
919   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
920   if (gch) A->nonzerostate++;
921   PetscFunctionReturn(PETSC_SUCCESS);
922 }
923 
924 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
925 {
926   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
927   PetscMPIInt        n = A->rmap->n;
928   PetscInt           i, j, r, m, len = 0;
929   PetscInt          *lrows, *owners = A->rmap->range;
930   PetscMPIInt        p = 0;
931   PetscSFNode       *rrows;
932   PetscSF            sf;
933   const PetscScalar *xx;
934   PetscScalar       *bb, *mask, *aij_a;
935   Vec                xmask, lmask;
936   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
937   const PetscInt    *aj, *ii, *ridx;
938   PetscScalar       *aa;
939 
940   PetscFunctionBegin;
941   /* Create SF where leaves are input rows and roots are owned rows */
942   PetscCall(PetscMalloc1(n, &lrows));
943   for (r = 0; r < n; ++r) lrows[r] = -1;
944   PetscCall(PetscMalloc1(N, &rrows));
945   for (r = 0; r < N; ++r) {
946     const PetscInt idx = rows[r];
947     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
948     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
949       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
950     }
951     rrows[r].rank  = p;
952     rrows[r].index = rows[r] - owners[p];
953   }
954   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
955   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
956   /* Collect flags for rows to be zeroed */
957   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
958   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
959   PetscCall(PetscSFDestroy(&sf));
960   /* Compress and put in row numbers */
961   for (r = 0; r < n; ++r)
962     if (lrows[r] >= 0) lrows[len++] = r;
963   /* zero diagonal part of matrix */
964   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
965   /* handle off diagonal part of matrix */
966   PetscCall(MatCreateVecs(A, &xmask, NULL));
967   PetscCall(VecDuplicate(l->lvec, &lmask));
968   PetscCall(VecGetArray(xmask, &bb));
969   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
970   PetscCall(VecRestoreArray(xmask, &bb));
971   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
972   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
973   PetscCall(VecDestroy(&xmask));
974   if (x && b) { /* this code is buggy when the row and column layout don't match */
975     PetscBool cong;
976 
977     PetscCall(MatHasCongruentLayouts(A, &cong));
978     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
979     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
980     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
981     PetscCall(VecGetArrayRead(l->lvec, &xx));
982     PetscCall(VecGetArray(b, &bb));
983   }
984   PetscCall(VecGetArray(lmask, &mask));
985   /* remove zeroed rows of off diagonal matrix */
986   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
987   ii = aij->i;
988   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
989   /* loop over all elements of off process part of matrix zeroing removed columns*/
990   if (aij->compressedrow.use) {
991     m    = aij->compressedrow.nrows;
992     ii   = aij->compressedrow.i;
993     ridx = aij->compressedrow.rindex;
994     for (i = 0; i < m; i++) {
995       n  = ii[i + 1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij_a + ii[i];
998 
999       for (j = 0; j < n; j++) {
1000         if (PetscAbsScalar(mask[*aj])) {
1001           if (b) bb[*ridx] -= *aa * xx[*aj];
1002           *aa = 0.0;
1003         }
1004         aa++;
1005         aj++;
1006       }
1007       ridx++;
1008     }
1009   } else { /* do not use compressed row format */
1010     m = l->B->rmap->n;
1011     for (i = 0; i < m; i++) {
1012       n  = ii[i + 1] - ii[i];
1013       aj = aij->j + ii[i];
1014       aa = aij_a + ii[i];
1015       for (j = 0; j < n; j++) {
1016         if (PetscAbsScalar(mask[*aj])) {
1017           if (b) bb[i] -= *aa * xx[*aj];
1018           *aa = 0.0;
1019         }
1020         aa++;
1021         aj++;
1022       }
1023     }
1024   }
1025   if (x && b) {
1026     PetscCall(VecRestoreArray(b, &bb));
1027     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1028   }
1029   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1030   PetscCall(VecRestoreArray(lmask, &mask));
1031   PetscCall(VecDestroy(&lmask));
1032   PetscCall(PetscFree(lrows));
1033 
1034   /* only change matrix nonzero state if pattern was allowed to be changed */
1035   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
1036     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1037     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1038   }
1039   PetscFunctionReturn(PETSC_SUCCESS);
1040 }
1041 
1042 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1043 {
1044   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1045   PetscInt    nt;
1046   VecScatter  Mvctx = a->Mvctx;
1047 
1048   PetscFunctionBegin;
1049   PetscCall(VecGetLocalSize(xx, &nt));
1050   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1051   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1052   PetscUseTypeMethod(a->A, mult, xx, yy);
1053   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1054   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1055   PetscFunctionReturn(PETSC_SUCCESS);
1056 }
1057 
1058 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1059 {
1060   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1064   PetscFunctionReturn(PETSC_SUCCESS);
1065 }
1066 
1067 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1068 {
1069   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1070   VecScatter  Mvctx = a->Mvctx;
1071 
1072   PetscFunctionBegin;
1073   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1074   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1075   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1076   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1077   PetscFunctionReturn(PETSC_SUCCESS);
1078 }
1079 
1080 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1081 {
1082   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1083 
1084   PetscFunctionBegin;
1085   /* do nondiagonal part */
1086   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1087   /* do local part */
1088   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1089   /* add partial results together */
1090   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1091   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1092   PetscFunctionReturn(PETSC_SUCCESS);
1093 }
1094 
1095 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1096 {
1097   MPI_Comm    comm;
1098   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1099   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1100   IS          Me, Notme;
1101   PetscInt    M, N, first, last, *notme, i;
1102   PetscBool   lf;
1103   PetscMPIInt size;
1104 
1105   PetscFunctionBegin;
1106   /* Easy test: symmetric diagonal block */
1107   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1108   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1109   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1110   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1111   PetscCallMPI(MPI_Comm_size(comm, &size));
1112   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1113 
1114   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1115   PetscCall(MatGetSize(Amat, &M, &N));
1116   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1117   PetscCall(PetscMalloc1(N - last + first, &notme));
1118   for (i = 0; i < first; i++) notme[i] = i;
1119   for (i = last; i < M; i++) notme[i - last + first] = i;
1120   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1121   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1122   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1123   Aoff = Aoffs[0];
1124   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1125   Boff = Boffs[0];
1126   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1127   PetscCall(MatDestroyMatrices(1, &Aoffs));
1128   PetscCall(MatDestroyMatrices(1, &Boffs));
1129   PetscCall(ISDestroy(&Me));
1130   PetscCall(ISDestroy(&Notme));
1131   PetscCall(PetscFree(notme));
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 
1135 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1136 {
1137   PetscFunctionBegin;
1138   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1139   PetscFunctionReturn(PETSC_SUCCESS);
1140 }
1141 
1142 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1143 {
1144   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1145 
1146   PetscFunctionBegin;
1147   /* do nondiagonal part */
1148   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1149   /* do local part */
1150   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1151   /* add partial results together */
1152   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1153   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1154   PetscFunctionReturn(PETSC_SUCCESS);
1155 }
1156 
1157 /*
1158   This only works correctly for square matrices where the subblock A->A is the
1159    diagonal block
1160 */
1161 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1162 {
1163   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1164 
1165   PetscFunctionBegin;
1166   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1167   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1168   PetscCall(MatGetDiagonal(a->A, v));
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1173 {
1174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1175 
1176   PetscFunctionBegin;
1177   PetscCall(MatScale(a->A, aa));
1178   PetscCall(MatScale(a->B, aa));
1179   PetscFunctionReturn(PETSC_SUCCESS);
1180 }
1181 
1182 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1183 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1184 {
1185   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1186 
1187   PetscFunctionBegin;
1188   PetscCall(PetscSFDestroy(&aij->coo_sf));
1189   PetscCall(PetscFree(aij->Aperm1));
1190   PetscCall(PetscFree(aij->Bperm1));
1191   PetscCall(PetscFree(aij->Ajmap1));
1192   PetscCall(PetscFree(aij->Bjmap1));
1193 
1194   PetscCall(PetscFree(aij->Aimap2));
1195   PetscCall(PetscFree(aij->Bimap2));
1196   PetscCall(PetscFree(aij->Aperm2));
1197   PetscCall(PetscFree(aij->Bperm2));
1198   PetscCall(PetscFree(aij->Ajmap2));
1199   PetscCall(PetscFree(aij->Bjmap2));
1200 
1201   PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf));
1202   PetscCall(PetscFree(aij->Cperm1));
1203   PetscFunctionReturn(PETSC_SUCCESS);
1204 }
1205 
1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1207 {
1208   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1209   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1210   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1211   const PetscInt    *garray = aij->garray;
1212   const PetscScalar *aa, *ba;
1213   PetscInt           header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb;
1214   PetscInt          *rowlens;
1215   PetscInt          *colidxs;
1216   PetscScalar       *matvals;
1217 
1218   PetscFunctionBegin;
1219   PetscCall(PetscViewerSetUp(viewer));
1220 
1221   M  = mat->rmap->N;
1222   N  = mat->cmap->N;
1223   m  = mat->rmap->n;
1224   rs = mat->rmap->rstart;
1225   cs = mat->cmap->rstart;
1226   nz = A->nz + B->nz;
1227 
1228   /* write matrix header */
1229   header[0] = MAT_FILE_CLASSID;
1230   header[1] = M;
1231   header[2] = N;
1232   header[3] = nz;
1233   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1234   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1235 
1236   /* fill in and store row lengths  */
1237   PetscCall(PetscMalloc1(m, &rowlens));
1238   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1239   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1240   PetscCall(PetscFree(rowlens));
1241 
1242   /* fill in and store column indices */
1243   PetscCall(PetscMalloc1(nz, &colidxs));
1244   for (cnt = 0, i = 0; i < m; i++) {
1245     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1246       if (garray[B->j[jb]] > cs) break;
1247       colidxs[cnt++] = garray[B->j[jb]];
1248     }
1249     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1250     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1251   }
1252   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1253   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1254   PetscCall(PetscFree(colidxs));
1255 
1256   /* fill in and store nonzero values */
1257   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1258   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1259   PetscCall(PetscMalloc1(nz, &matvals));
1260   for (cnt = 0, i = 0; i < m; i++) {
1261     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1262       if (garray[B->j[jb]] > cs) break;
1263       matvals[cnt++] = ba[jb];
1264     }
1265     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1266     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1267   }
1268   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1269   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1270   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1271   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1272   PetscCall(PetscFree(matvals));
1273 
1274   /* write block size option to the viewer's .info file */
1275   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1276   PetscFunctionReturn(PETSC_SUCCESS);
1277 }
1278 
1279 #include <petscdraw.h>
1280 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1281 {
1282   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1283   PetscMPIInt       rank = aij->rank, size = aij->size;
1284   PetscBool         isdraw, iascii, isbinary;
1285   PetscViewer       sviewer;
1286   PetscViewerFormat format;
1287 
1288   PetscFunctionBegin;
1289   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1290   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1291   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1292   if (iascii) {
1293     PetscCall(PetscViewerGetFormat(viewer, &format));
1294     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1295       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1296       PetscCall(PetscMalloc1(size, &nz));
1297       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1298       for (i = 0; i < (PetscInt)size; i++) {
1299         nmax = PetscMax(nmax, nz[i]);
1300         nmin = PetscMin(nmin, nz[i]);
1301         navg += nz[i];
1302       }
1303       PetscCall(PetscFree(nz));
1304       navg = navg / size;
1305       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1306       PetscFunctionReturn(PETSC_SUCCESS);
1307     }
1308     PetscCall(PetscViewerGetFormat(viewer, &format));
1309     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1310       MatInfo   info;
1311       PetscInt *inodes = NULL;
1312 
1313       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1314       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1315       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1316       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1317       if (!inodes) {
1318         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1319                                                      (double)info.memory));
1320       } else {
1321         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1322                                                      (double)info.memory));
1323       }
1324       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1325       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1326       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1327       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1328       PetscCall(PetscViewerFlush(viewer));
1329       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1330       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1331       PetscCall(VecScatterView(aij->Mvctx, viewer));
1332       PetscFunctionReturn(PETSC_SUCCESS);
1333     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1334       PetscInt inodecount, inodelimit, *inodes;
1335       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1336       if (inodes) {
1337         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1338       } else {
1339         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1340       }
1341       PetscFunctionReturn(PETSC_SUCCESS);
1342     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1343       PetscFunctionReturn(PETSC_SUCCESS);
1344     }
1345   } else if (isbinary) {
1346     if (size == 1) {
1347       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1348       PetscCall(MatView(aij->A, viewer));
1349     } else {
1350       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1351     }
1352     PetscFunctionReturn(PETSC_SUCCESS);
1353   } else if (iascii && size == 1) {
1354     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1355     PetscCall(MatView(aij->A, viewer));
1356     PetscFunctionReturn(PETSC_SUCCESS);
1357   } else if (isdraw) {
1358     PetscDraw draw;
1359     PetscBool isnull;
1360     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1361     PetscCall(PetscDrawIsNull(draw, &isnull));
1362     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1363   }
1364 
1365   { /* assemble the entire matrix onto first processor */
1366     Mat A = NULL, Av;
1367     IS  isrow, iscol;
1368 
1369     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1370     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1371     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1372     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1373     /*  The commented code uses MatCreateSubMatrices instead */
1374     /*
1375     Mat *AA, A = NULL, Av;
1376     IS  isrow,iscol;
1377 
1378     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1379     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1380     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1381     if (rank == 0) {
1382        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1383        A    = AA[0];
1384        Av   = AA[0];
1385     }
1386     PetscCall(MatDestroySubMatrices(1,&AA));
1387 */
1388     PetscCall(ISDestroy(&iscol));
1389     PetscCall(ISDestroy(&isrow));
1390     /*
1391        Everyone has to call to draw the matrix since the graphics waits are
1392        synchronized across all processors that share the PetscDraw object
1393     */
1394     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1395     if (rank == 0) {
1396       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1397       PetscCall(MatView_SeqAIJ(Av, sviewer));
1398     }
1399     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1400     PetscCall(PetscViewerFlush(viewer));
1401     PetscCall(MatDestroy(&A));
1402   }
1403   PetscFunctionReturn(PETSC_SUCCESS);
1404 }
1405 
1406 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1407 {
1408   PetscBool iascii, isdraw, issocket, isbinary;
1409 
1410   PetscFunctionBegin;
1411   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1412   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1413   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1414   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1415   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1416   PetscFunctionReturn(PETSC_SUCCESS);
1417 }
1418 
1419 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1420 {
1421   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1422   Vec         bb1 = NULL;
1423   PetscBool   hasop;
1424 
1425   PetscFunctionBegin;
1426   if (flag == SOR_APPLY_UPPER) {
1427     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1428     PetscFunctionReturn(PETSC_SUCCESS);
1429   }
1430 
1431   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1432 
1433   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1434     if (flag & SOR_ZERO_INITIAL_GUESS) {
1435       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1436       its--;
1437     }
1438 
1439     while (its--) {
1440       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1441       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1442 
1443       /* update rhs: bb1 = bb - B*x */
1444       PetscCall(VecScale(mat->lvec, -1.0));
1445       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1446 
1447       /* local sweep */
1448       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1449     }
1450   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1451     if (flag & SOR_ZERO_INITIAL_GUESS) {
1452       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1453       its--;
1454     }
1455     while (its--) {
1456       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1457       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1458 
1459       /* update rhs: bb1 = bb - B*x */
1460       PetscCall(VecScale(mat->lvec, -1.0));
1461       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1462 
1463       /* local sweep */
1464       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1465     }
1466   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1467     if (flag & SOR_ZERO_INITIAL_GUESS) {
1468       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1469       its--;
1470     }
1471     while (its--) {
1472       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1473       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1474 
1475       /* update rhs: bb1 = bb - B*x */
1476       PetscCall(VecScale(mat->lvec, -1.0));
1477       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1478 
1479       /* local sweep */
1480       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1481     }
1482   } else if (flag & SOR_EISENSTAT) {
1483     Vec xx1;
1484 
1485     PetscCall(VecDuplicate(bb, &xx1));
1486     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1487 
1488     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1489     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1490     if (!mat->diag) {
1491       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1492       PetscCall(MatGetDiagonal(matin, mat->diag));
1493     }
1494     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1495     if (hasop) {
1496       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1497     } else {
1498       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1499     }
1500     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1501 
1502     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1503 
1504     /* local sweep */
1505     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1506     PetscCall(VecAXPY(xx, 1.0, xx1));
1507     PetscCall(VecDestroy(&xx1));
1508   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1509 
1510   PetscCall(VecDestroy(&bb1));
1511 
1512   matin->factorerrortype = mat->A->factorerrortype;
1513   PetscFunctionReturn(PETSC_SUCCESS);
1514 }
1515 
1516 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1517 {
1518   Mat             aA, aB, Aperm;
1519   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1520   PetscScalar    *aa, *ba;
1521   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1522   PetscSF         rowsf, sf;
1523   IS              parcolp = NULL;
1524   PetscBool       done;
1525 
1526   PetscFunctionBegin;
1527   PetscCall(MatGetLocalSize(A, &m, &n));
1528   PetscCall(ISGetIndices(rowp, &rwant));
1529   PetscCall(ISGetIndices(colp, &cwant));
1530   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1531 
1532   /* Invert row permutation to find out where my rows should go */
1533   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1534   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1535   PetscCall(PetscSFSetFromOptions(rowsf));
1536   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1537   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1538   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1539 
1540   /* Invert column permutation to find out where my columns should go */
1541   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1542   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1543   PetscCall(PetscSFSetFromOptions(sf));
1544   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1545   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1546   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1547   PetscCall(PetscSFDestroy(&sf));
1548 
1549   PetscCall(ISRestoreIndices(rowp, &rwant));
1550   PetscCall(ISRestoreIndices(colp, &cwant));
1551   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1552 
1553   /* Find out where my gcols should go */
1554   PetscCall(MatGetSize(aB, NULL, &ng));
1555   PetscCall(PetscMalloc1(ng, &gcdest));
1556   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1557   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1558   PetscCall(PetscSFSetFromOptions(sf));
1559   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1560   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1561   PetscCall(PetscSFDestroy(&sf));
1562 
1563   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1564   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1565   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1566   for (i = 0; i < m; i++) {
1567     PetscInt    row = rdest[i];
1568     PetscMPIInt rowner;
1569     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1570     for (j = ai[i]; j < ai[i + 1]; j++) {
1571       PetscInt    col = cdest[aj[j]];
1572       PetscMPIInt cowner;
1573       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1574       if (rowner == cowner) dnnz[i]++;
1575       else onnz[i]++;
1576     }
1577     for (j = bi[i]; j < bi[i + 1]; j++) {
1578       PetscInt    col = gcdest[bj[j]];
1579       PetscMPIInt cowner;
1580       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1581       if (rowner == cowner) dnnz[i]++;
1582       else onnz[i]++;
1583     }
1584   }
1585   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1586   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1587   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1588   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1589   PetscCall(PetscSFDestroy(&rowsf));
1590 
1591   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1592   PetscCall(MatSeqAIJGetArray(aA, &aa));
1593   PetscCall(MatSeqAIJGetArray(aB, &ba));
1594   for (i = 0; i < m; i++) {
1595     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1596     PetscInt  j0, rowlen;
1597     rowlen = ai[i + 1] - ai[i];
1598     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1599       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1600       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1601     }
1602     rowlen = bi[i + 1] - bi[i];
1603     for (j0 = j = 0; j < rowlen; j0 = j) {
1604       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1605       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1606     }
1607   }
1608   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1609   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1610   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1611   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1612   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1613   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1614   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1615   PetscCall(PetscFree3(work, rdest, cdest));
1616   PetscCall(PetscFree(gcdest));
1617   if (parcolp) PetscCall(ISDestroy(&colp));
1618   *B = Aperm;
1619   PetscFunctionReturn(PETSC_SUCCESS);
1620 }
1621 
1622 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1623 {
1624   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1625 
1626   PetscFunctionBegin;
1627   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1628   if (ghosts) *ghosts = aij->garray;
1629   PetscFunctionReturn(PETSC_SUCCESS);
1630 }
1631 
1632 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1633 {
1634   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1635   Mat            A = mat->A, B = mat->B;
1636   PetscLogDouble isend[5], irecv[5];
1637 
1638   PetscFunctionBegin;
1639   info->block_size = 1.0;
1640   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1641 
1642   isend[0] = info->nz_used;
1643   isend[1] = info->nz_allocated;
1644   isend[2] = info->nz_unneeded;
1645   isend[3] = info->memory;
1646   isend[4] = info->mallocs;
1647 
1648   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1649 
1650   isend[0] += info->nz_used;
1651   isend[1] += info->nz_allocated;
1652   isend[2] += info->nz_unneeded;
1653   isend[3] += info->memory;
1654   isend[4] += info->mallocs;
1655   if (flag == MAT_LOCAL) {
1656     info->nz_used      = isend[0];
1657     info->nz_allocated = isend[1];
1658     info->nz_unneeded  = isend[2];
1659     info->memory       = isend[3];
1660     info->mallocs      = isend[4];
1661   } else if (flag == MAT_GLOBAL_MAX) {
1662     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1663 
1664     info->nz_used      = irecv[0];
1665     info->nz_allocated = irecv[1];
1666     info->nz_unneeded  = irecv[2];
1667     info->memory       = irecv[3];
1668     info->mallocs      = irecv[4];
1669   } else if (flag == MAT_GLOBAL_SUM) {
1670     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1671 
1672     info->nz_used      = irecv[0];
1673     info->nz_allocated = irecv[1];
1674     info->nz_unneeded  = irecv[2];
1675     info->memory       = irecv[3];
1676     info->mallocs      = irecv[4];
1677   }
1678   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1679   info->fill_ratio_needed = 0;
1680   info->factor_mallocs    = 0;
1681   PetscFunctionReturn(PETSC_SUCCESS);
1682 }
1683 
1684 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1685 {
1686   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1687 
1688   PetscFunctionBegin;
1689   switch (op) {
1690   case MAT_NEW_NONZERO_LOCATIONS:
1691   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1692   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1693   case MAT_KEEP_NONZERO_PATTERN:
1694   case MAT_NEW_NONZERO_LOCATION_ERR:
1695   case MAT_USE_INODES:
1696   case MAT_IGNORE_ZERO_ENTRIES:
1697   case MAT_FORM_EXPLICIT_TRANSPOSE:
1698     MatCheckPreallocated(A, 1);
1699     PetscCall(MatSetOption(a->A, op, flg));
1700     PetscCall(MatSetOption(a->B, op, flg));
1701     break;
1702   case MAT_ROW_ORIENTED:
1703     MatCheckPreallocated(A, 1);
1704     a->roworiented = flg;
1705 
1706     PetscCall(MatSetOption(a->A, op, flg));
1707     PetscCall(MatSetOption(a->B, op, flg));
1708     break;
1709   case MAT_FORCE_DIAGONAL_ENTRIES:
1710   case MAT_SORTED_FULL:
1711     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1712     break;
1713   case MAT_IGNORE_OFF_PROC_ENTRIES:
1714     a->donotstash = flg;
1715     break;
1716   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1717   case MAT_SPD:
1718   case MAT_SYMMETRIC:
1719   case MAT_STRUCTURALLY_SYMMETRIC:
1720   case MAT_HERMITIAN:
1721   case MAT_SYMMETRY_ETERNAL:
1722   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1723   case MAT_SPD_ETERNAL:
1724     /* if the diagonal matrix is square it inherits some of the properties above */
1725     break;
1726   case MAT_SUBMAT_SINGLEIS:
1727     A->submat_singleis = flg;
1728     break;
1729   case MAT_STRUCTURE_ONLY:
1730     /* The option is handled directly by MatSetOption() */
1731     break;
1732   default:
1733     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1734   }
1735   PetscFunctionReturn(PETSC_SUCCESS);
1736 }
1737 
1738 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1739 {
1740   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1741   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1742   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1743   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1744   PetscInt    *cmap, *idx_p;
1745 
1746   PetscFunctionBegin;
1747   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1748   mat->getrowactive = PETSC_TRUE;
1749 
1750   if (!mat->rowvalues && (idx || v)) {
1751     /*
1752         allocate enough space to hold information from the longest row.
1753     */
1754     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1755     PetscInt    max = 1, tmp;
1756     for (i = 0; i < matin->rmap->n; i++) {
1757       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1758       if (max < tmp) max = tmp;
1759     }
1760     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1761   }
1762 
1763   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1764   lrow = row - rstart;
1765 
1766   pvA = &vworkA;
1767   pcA = &cworkA;
1768   pvB = &vworkB;
1769   pcB = &cworkB;
1770   if (!v) {
1771     pvA = NULL;
1772     pvB = NULL;
1773   }
1774   if (!idx) {
1775     pcA = NULL;
1776     if (!v) pcB = NULL;
1777   }
1778   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1779   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1780   nztot = nzA + nzB;
1781 
1782   cmap = mat->garray;
1783   if (v || idx) {
1784     if (nztot) {
1785       /* Sort by increasing column numbers, assuming A and B already sorted */
1786       PetscInt imark = -1;
1787       if (v) {
1788         *v = v_p = mat->rowvalues;
1789         for (i = 0; i < nzB; i++) {
1790           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1791           else break;
1792         }
1793         imark = i;
1794         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1795         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1796       }
1797       if (idx) {
1798         *idx = idx_p = mat->rowindices;
1799         if (imark > -1) {
1800           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1801         } else {
1802           for (i = 0; i < nzB; i++) {
1803             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1804             else break;
1805           }
1806           imark = i;
1807         }
1808         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1809         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1810       }
1811     } else {
1812       if (idx) *idx = NULL;
1813       if (v) *v = NULL;
1814     }
1815   }
1816   *nz = nztot;
1817   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1818   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1819   PetscFunctionReturn(PETSC_SUCCESS);
1820 }
1821 
1822 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1823 {
1824   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1825 
1826   PetscFunctionBegin;
1827   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1828   aij->getrowactive = PETSC_FALSE;
1829   PetscFunctionReturn(PETSC_SUCCESS);
1830 }
1831 
1832 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1833 {
1834   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1835   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1836   PetscInt         i, j, cstart = mat->cmap->rstart;
1837   PetscReal        sum = 0.0;
1838   const MatScalar *v, *amata, *bmata;
1839 
1840   PetscFunctionBegin;
1841   if (aij->size == 1) {
1842     PetscCall(MatNorm(aij->A, type, norm));
1843   } else {
1844     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1845     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1846     if (type == NORM_FROBENIUS) {
1847       v = amata;
1848       for (i = 0; i < amat->nz; i++) {
1849         sum += PetscRealPart(PetscConj(*v) * (*v));
1850         v++;
1851       }
1852       v = bmata;
1853       for (i = 0; i < bmat->nz; i++) {
1854         sum += PetscRealPart(PetscConj(*v) * (*v));
1855         v++;
1856       }
1857       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1858       *norm = PetscSqrtReal(*norm);
1859       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1860     } else if (type == NORM_1) { /* max column norm */
1861       PetscReal *tmp, *tmp2;
1862       PetscInt  *jj, *garray = aij->garray;
1863       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1864       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1865       *norm = 0.0;
1866       v     = amata;
1867       jj    = amat->j;
1868       for (j = 0; j < amat->nz; j++) {
1869         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1870         v++;
1871       }
1872       v  = bmata;
1873       jj = bmat->j;
1874       for (j = 0; j < bmat->nz; j++) {
1875         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1876         v++;
1877       }
1878       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1879       for (j = 0; j < mat->cmap->N; j++) {
1880         if (tmp2[j] > *norm) *norm = tmp2[j];
1881       }
1882       PetscCall(PetscFree(tmp));
1883       PetscCall(PetscFree(tmp2));
1884       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1885     } else if (type == NORM_INFINITY) { /* max row norm */
1886       PetscReal ntemp = 0.0;
1887       for (j = 0; j < aij->A->rmap->n; j++) {
1888         v   = amata + amat->i[j];
1889         sum = 0.0;
1890         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1891           sum += PetscAbsScalar(*v);
1892           v++;
1893         }
1894         v = bmata + bmat->i[j];
1895         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1896           sum += PetscAbsScalar(*v);
1897           v++;
1898         }
1899         if (sum > ntemp) ntemp = sum;
1900       }
1901       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1902       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1903     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1904     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1905     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1906   }
1907   PetscFunctionReturn(PETSC_SUCCESS);
1908 }
1909 
1910 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1911 {
1912   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1913   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1914   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1915   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1916   Mat              B, A_diag, *B_diag;
1917   const MatScalar *pbv, *bv;
1918 
1919   PetscFunctionBegin;
1920   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1921   ma = A->rmap->n;
1922   na = A->cmap->n;
1923   mb = a->B->rmap->n;
1924   nb = a->B->cmap->n;
1925   ai = Aloc->i;
1926   aj = Aloc->j;
1927   bi = Bloc->i;
1928   bj = Bloc->j;
1929   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1930     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1931     PetscSFNode         *oloc;
1932     PETSC_UNUSED PetscSF sf;
1933 
1934     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1935     /* compute d_nnz for preallocation */
1936     PetscCall(PetscArrayzero(d_nnz, na));
1937     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1938     /* compute local off-diagonal contributions */
1939     PetscCall(PetscArrayzero(g_nnz, nb));
1940     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1941     /* map those to global */
1942     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1943     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1944     PetscCall(PetscSFSetFromOptions(sf));
1945     PetscCall(PetscArrayzero(o_nnz, na));
1946     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1947     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1948     PetscCall(PetscSFDestroy(&sf));
1949 
1950     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1951     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1952     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1953     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1954     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1955     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1956   } else {
1957     B = *matout;
1958     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1959   }
1960 
1961   b           = (Mat_MPIAIJ *)B->data;
1962   A_diag      = a->A;
1963   B_diag      = &b->A;
1964   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1965   A_diag_ncol = A_diag->cmap->N;
1966   B_diag_ilen = sub_B_diag->ilen;
1967   B_diag_i    = sub_B_diag->i;
1968 
1969   /* Set ilen for diagonal of B */
1970   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1971 
1972   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1973   very quickly (=without using MatSetValues), because all writes are local. */
1974   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1975   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1976 
1977   /* copy over the B part */
1978   PetscCall(PetscMalloc1(bi[mb], &cols));
1979   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1980   pbv = bv;
1981   row = A->rmap->rstart;
1982   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1983   cols_tmp = cols;
1984   for (i = 0; i < mb; i++) {
1985     ncol = bi[i + 1] - bi[i];
1986     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1987     row++;
1988     pbv += ncol;
1989     cols_tmp += ncol;
1990   }
1991   PetscCall(PetscFree(cols));
1992   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1993 
1994   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1995   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1996   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1997     *matout = B;
1998   } else {
1999     PetscCall(MatHeaderMerge(A, &B));
2000   }
2001   PetscFunctionReturn(PETSC_SUCCESS);
2002 }
2003 
2004 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
2005 {
2006   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2007   Mat         a = aij->A, b = aij->B;
2008   PetscInt    s1, s2, s3;
2009 
2010   PetscFunctionBegin;
2011   PetscCall(MatGetLocalSize(mat, &s2, &s3));
2012   if (rr) {
2013     PetscCall(VecGetLocalSize(rr, &s1));
2014     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
2015     /* Overlap communication with computation. */
2016     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2017   }
2018   if (ll) {
2019     PetscCall(VecGetLocalSize(ll, &s1));
2020     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
2021     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
2022   }
2023   /* scale  the diagonal block */
2024   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2025 
2026   if (rr) {
2027     /* Do a scatter end and then right scale the off-diagonal block */
2028     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2029     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2030   }
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
2034 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2035 {
2036   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2037 
2038   PetscFunctionBegin;
2039   PetscCall(MatSetUnfactored(a->A));
2040   PetscFunctionReturn(PETSC_SUCCESS);
2041 }
2042 
2043 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2044 {
2045   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2046   Mat         a, b, c, d;
2047   PetscBool   flg;
2048 
2049   PetscFunctionBegin;
2050   a = matA->A;
2051   b = matA->B;
2052   c = matB->A;
2053   d = matB->B;
2054 
2055   PetscCall(MatEqual(a, c, &flg));
2056   if (flg) PetscCall(MatEqual(b, d, &flg));
2057   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2058   PetscFunctionReturn(PETSC_SUCCESS);
2059 }
2060 
2061 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2062 {
2063   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2064   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2065 
2066   PetscFunctionBegin;
2067   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2068   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2069     /* because of the column compression in the off-processor part of the matrix a->B,
2070        the number of columns in a->B and b->B may be different, hence we cannot call
2071        the MatCopy() directly on the two parts. If need be, we can provide a more
2072        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2073        then copying the submatrices */
2074     PetscCall(MatCopy_Basic(A, B, str));
2075   } else {
2076     PetscCall(MatCopy(a->A, b->A, str));
2077     PetscCall(MatCopy(a->B, b->B, str));
2078   }
2079   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2080   PetscFunctionReturn(PETSC_SUCCESS);
2081 }
2082 
2083 /*
2084    Computes the number of nonzeros per row needed for preallocation when X and Y
2085    have different nonzero structure.
2086 */
2087 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2088 {
2089   PetscInt i, j, k, nzx, nzy;
2090 
2091   PetscFunctionBegin;
2092   /* Set the number of nonzeros in the new matrix */
2093   for (i = 0; i < m; i++) {
2094     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2095     nzx    = xi[i + 1] - xi[i];
2096     nzy    = yi[i + 1] - yi[i];
2097     nnz[i] = 0;
2098     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2099       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2100       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2101       nnz[i]++;
2102     }
2103     for (; k < nzy; k++) nnz[i]++;
2104   }
2105   PetscFunctionReturn(PETSC_SUCCESS);
2106 }
2107 
2108 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2109 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2110 {
2111   PetscInt    m = Y->rmap->N;
2112   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2113   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2114 
2115   PetscFunctionBegin;
2116   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2117   PetscFunctionReturn(PETSC_SUCCESS);
2118 }
2119 
2120 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2121 {
2122   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2123 
2124   PetscFunctionBegin;
2125   if (str == SAME_NONZERO_PATTERN) {
2126     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2127     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2128   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2129     PetscCall(MatAXPY_Basic(Y, a, X, str));
2130   } else {
2131     Mat       B;
2132     PetscInt *nnz_d, *nnz_o;
2133 
2134     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2135     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2136     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2137     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2138     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2139     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2140     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2141     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2142     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2143     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2144     PetscCall(MatHeaderMerge(Y, &B));
2145     PetscCall(PetscFree(nnz_d));
2146     PetscCall(PetscFree(nnz_o));
2147   }
2148   PetscFunctionReturn(PETSC_SUCCESS);
2149 }
2150 
2151 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2152 
2153 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2154 {
2155   PetscFunctionBegin;
2156   if (PetscDefined(USE_COMPLEX)) {
2157     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2158 
2159     PetscCall(MatConjugate_SeqAIJ(aij->A));
2160     PetscCall(MatConjugate_SeqAIJ(aij->B));
2161   }
2162   PetscFunctionReturn(PETSC_SUCCESS);
2163 }
2164 
2165 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2166 {
2167   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2168 
2169   PetscFunctionBegin;
2170   PetscCall(MatRealPart(a->A));
2171   PetscCall(MatRealPart(a->B));
2172   PetscFunctionReturn(PETSC_SUCCESS);
2173 }
2174 
2175 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2176 {
2177   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2178 
2179   PetscFunctionBegin;
2180   PetscCall(MatImaginaryPart(a->A));
2181   PetscCall(MatImaginaryPart(a->B));
2182   PetscFunctionReturn(PETSC_SUCCESS);
2183 }
2184 
2185 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2186 {
2187   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2188   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2189   PetscScalar       *va, *vv;
2190   Vec                vB, vA;
2191   const PetscScalar *vb;
2192 
2193   PetscFunctionBegin;
2194   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2195   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2196 
2197   PetscCall(VecGetArrayWrite(vA, &va));
2198   if (idx) {
2199     for (i = 0; i < m; i++) {
2200       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2201     }
2202   }
2203 
2204   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2205   PetscCall(PetscMalloc1(m, &idxb));
2206   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2207 
2208   PetscCall(VecGetArrayWrite(v, &vv));
2209   PetscCall(VecGetArrayRead(vB, &vb));
2210   for (i = 0; i < m; i++) {
2211     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2212       vv[i] = vb[i];
2213       if (idx) idx[i] = a->garray[idxb[i]];
2214     } else {
2215       vv[i] = va[i];
2216       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2217     }
2218   }
2219   PetscCall(VecRestoreArrayWrite(vA, &vv));
2220   PetscCall(VecRestoreArrayWrite(vA, &va));
2221   PetscCall(VecRestoreArrayRead(vB, &vb));
2222   PetscCall(PetscFree(idxb));
2223   PetscCall(VecDestroy(&vA));
2224   PetscCall(VecDestroy(&vB));
2225   PetscFunctionReturn(PETSC_SUCCESS);
2226 }
2227 
2228 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2229 {
2230   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2231   PetscInt           m = A->rmap->n, n = A->cmap->n;
2232   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2233   PetscInt          *cmap = mat->garray;
2234   PetscInt          *diagIdx, *offdiagIdx;
2235   Vec                diagV, offdiagV;
2236   PetscScalar       *a, *diagA, *offdiagA;
2237   const PetscScalar *ba, *bav;
2238   PetscInt           r, j, col, ncols, *bi, *bj;
2239   Mat                B = mat->B;
2240   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2241 
2242   PetscFunctionBegin;
2243   /* When a process holds entire A and other processes have no entry */
2244   if (A->cmap->N == n) {
2245     PetscCall(VecGetArrayWrite(v, &diagA));
2246     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2247     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2248     PetscCall(VecDestroy(&diagV));
2249     PetscCall(VecRestoreArrayWrite(v, &diagA));
2250     PetscFunctionReturn(PETSC_SUCCESS);
2251   } else if (n == 0) {
2252     if (m) {
2253       PetscCall(VecGetArrayWrite(v, &a));
2254       for (r = 0; r < m; r++) {
2255         a[r] = 0.0;
2256         if (idx) idx[r] = -1;
2257       }
2258       PetscCall(VecRestoreArrayWrite(v, &a));
2259     }
2260     PetscFunctionReturn(PETSC_SUCCESS);
2261   }
2262 
2263   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2264   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2265   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2266   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2267 
2268   /* Get offdiagIdx[] for implicit 0.0 */
2269   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2270   ba = bav;
2271   bi = b->i;
2272   bj = b->j;
2273   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2274   for (r = 0; r < m; r++) {
2275     ncols = bi[r + 1] - bi[r];
2276     if (ncols == A->cmap->N - n) { /* Brow is dense */
2277       offdiagA[r]   = *ba;
2278       offdiagIdx[r] = cmap[0];
2279     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2280       offdiagA[r] = 0.0;
2281 
2282       /* Find first hole in the cmap */
2283       for (j = 0; j < ncols; j++) {
2284         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2285         if (col > j && j < cstart) {
2286           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2287           break;
2288         } else if (col > j + n && j >= cstart) {
2289           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2290           break;
2291         }
2292       }
2293       if (j == ncols && ncols < A->cmap->N - n) {
2294         /* a hole is outside compressed Bcols */
2295         if (ncols == 0) {
2296           if (cstart) {
2297             offdiagIdx[r] = 0;
2298           } else offdiagIdx[r] = cend;
2299         } else { /* ncols > 0 */
2300           offdiagIdx[r] = cmap[ncols - 1] + 1;
2301           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2302         }
2303       }
2304     }
2305 
2306     for (j = 0; j < ncols; j++) {
2307       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2308         offdiagA[r]   = *ba;
2309         offdiagIdx[r] = cmap[*bj];
2310       }
2311       ba++;
2312       bj++;
2313     }
2314   }
2315 
2316   PetscCall(VecGetArrayWrite(v, &a));
2317   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2318   for (r = 0; r < m; ++r) {
2319     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2320       a[r] = diagA[r];
2321       if (idx) idx[r] = cstart + diagIdx[r];
2322     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2323       a[r] = diagA[r];
2324       if (idx) {
2325         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2326           idx[r] = cstart + diagIdx[r];
2327         } else idx[r] = offdiagIdx[r];
2328       }
2329     } else {
2330       a[r] = offdiagA[r];
2331       if (idx) idx[r] = offdiagIdx[r];
2332     }
2333   }
2334   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2335   PetscCall(VecRestoreArrayWrite(v, &a));
2336   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2337   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2338   PetscCall(VecDestroy(&diagV));
2339   PetscCall(VecDestroy(&offdiagV));
2340   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2341   PetscFunctionReturn(PETSC_SUCCESS);
2342 }
2343 
2344 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2345 {
2346   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2347   PetscInt           m = A->rmap->n, n = A->cmap->n;
2348   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2349   PetscInt          *cmap = mat->garray;
2350   PetscInt          *diagIdx, *offdiagIdx;
2351   Vec                diagV, offdiagV;
2352   PetscScalar       *a, *diagA, *offdiagA;
2353   const PetscScalar *ba, *bav;
2354   PetscInt           r, j, col, ncols, *bi, *bj;
2355   Mat                B = mat->B;
2356   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2357 
2358   PetscFunctionBegin;
2359   /* When a process holds entire A and other processes have no entry */
2360   if (A->cmap->N == n) {
2361     PetscCall(VecGetArrayWrite(v, &diagA));
2362     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2363     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2364     PetscCall(VecDestroy(&diagV));
2365     PetscCall(VecRestoreArrayWrite(v, &diagA));
2366     PetscFunctionReturn(PETSC_SUCCESS);
2367   } else if (n == 0) {
2368     if (m) {
2369       PetscCall(VecGetArrayWrite(v, &a));
2370       for (r = 0; r < m; r++) {
2371         a[r] = PETSC_MAX_REAL;
2372         if (idx) idx[r] = -1;
2373       }
2374       PetscCall(VecRestoreArrayWrite(v, &a));
2375     }
2376     PetscFunctionReturn(PETSC_SUCCESS);
2377   }
2378 
2379   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2380   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2381   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2382   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2383 
2384   /* Get offdiagIdx[] for implicit 0.0 */
2385   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2386   ba = bav;
2387   bi = b->i;
2388   bj = b->j;
2389   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2390   for (r = 0; r < m; r++) {
2391     ncols = bi[r + 1] - bi[r];
2392     if (ncols == A->cmap->N - n) { /* Brow is dense */
2393       offdiagA[r]   = *ba;
2394       offdiagIdx[r] = cmap[0];
2395     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2396       offdiagA[r] = 0.0;
2397 
2398       /* Find first hole in the cmap */
2399       for (j = 0; j < ncols; j++) {
2400         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2401         if (col > j && j < cstart) {
2402           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2403           break;
2404         } else if (col > j + n && j >= cstart) {
2405           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2406           break;
2407         }
2408       }
2409       if (j == ncols && ncols < A->cmap->N - n) {
2410         /* a hole is outside compressed Bcols */
2411         if (ncols == 0) {
2412           if (cstart) {
2413             offdiagIdx[r] = 0;
2414           } else offdiagIdx[r] = cend;
2415         } else { /* ncols > 0 */
2416           offdiagIdx[r] = cmap[ncols - 1] + 1;
2417           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2418         }
2419       }
2420     }
2421 
2422     for (j = 0; j < ncols; j++) {
2423       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2424         offdiagA[r]   = *ba;
2425         offdiagIdx[r] = cmap[*bj];
2426       }
2427       ba++;
2428       bj++;
2429     }
2430   }
2431 
2432   PetscCall(VecGetArrayWrite(v, &a));
2433   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2434   for (r = 0; r < m; ++r) {
2435     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2436       a[r] = diagA[r];
2437       if (idx) idx[r] = cstart + diagIdx[r];
2438     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2439       a[r] = diagA[r];
2440       if (idx) {
2441         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2442           idx[r] = cstart + diagIdx[r];
2443         } else idx[r] = offdiagIdx[r];
2444       }
2445     } else {
2446       a[r] = offdiagA[r];
2447       if (idx) idx[r] = offdiagIdx[r];
2448     }
2449   }
2450   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2451   PetscCall(VecRestoreArrayWrite(v, &a));
2452   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2453   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2454   PetscCall(VecDestroy(&diagV));
2455   PetscCall(VecDestroy(&offdiagV));
2456   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2457   PetscFunctionReturn(PETSC_SUCCESS);
2458 }
2459 
2460 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2461 {
2462   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2463   PetscInt           m = A->rmap->n, n = A->cmap->n;
2464   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2465   PetscInt          *cmap = mat->garray;
2466   PetscInt          *diagIdx, *offdiagIdx;
2467   Vec                diagV, offdiagV;
2468   PetscScalar       *a, *diagA, *offdiagA;
2469   const PetscScalar *ba, *bav;
2470   PetscInt           r, j, col, ncols, *bi, *bj;
2471   Mat                B = mat->B;
2472   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2473 
2474   PetscFunctionBegin;
2475   /* When a process holds entire A and other processes have no entry */
2476   if (A->cmap->N == n) {
2477     PetscCall(VecGetArrayWrite(v, &diagA));
2478     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2479     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2480     PetscCall(VecDestroy(&diagV));
2481     PetscCall(VecRestoreArrayWrite(v, &diagA));
2482     PetscFunctionReturn(PETSC_SUCCESS);
2483   } else if (n == 0) {
2484     if (m) {
2485       PetscCall(VecGetArrayWrite(v, &a));
2486       for (r = 0; r < m; r++) {
2487         a[r] = PETSC_MIN_REAL;
2488         if (idx) idx[r] = -1;
2489       }
2490       PetscCall(VecRestoreArrayWrite(v, &a));
2491     }
2492     PetscFunctionReturn(PETSC_SUCCESS);
2493   }
2494 
2495   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2496   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2497   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2498   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2499 
2500   /* Get offdiagIdx[] for implicit 0.0 */
2501   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2502   ba = bav;
2503   bi = b->i;
2504   bj = b->j;
2505   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2506   for (r = 0; r < m; r++) {
2507     ncols = bi[r + 1] - bi[r];
2508     if (ncols == A->cmap->N - n) { /* Brow is dense */
2509       offdiagA[r]   = *ba;
2510       offdiagIdx[r] = cmap[0];
2511     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2512       offdiagA[r] = 0.0;
2513 
2514       /* Find first hole in the cmap */
2515       for (j = 0; j < ncols; j++) {
2516         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2517         if (col > j && j < cstart) {
2518           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2519           break;
2520         } else if (col > j + n && j >= cstart) {
2521           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2522           break;
2523         }
2524       }
2525       if (j == ncols && ncols < A->cmap->N - n) {
2526         /* a hole is outside compressed Bcols */
2527         if (ncols == 0) {
2528           if (cstart) {
2529             offdiagIdx[r] = 0;
2530           } else offdiagIdx[r] = cend;
2531         } else { /* ncols > 0 */
2532           offdiagIdx[r] = cmap[ncols - 1] + 1;
2533           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2534         }
2535       }
2536     }
2537 
2538     for (j = 0; j < ncols; j++) {
2539       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2540         offdiagA[r]   = *ba;
2541         offdiagIdx[r] = cmap[*bj];
2542       }
2543       ba++;
2544       bj++;
2545     }
2546   }
2547 
2548   PetscCall(VecGetArrayWrite(v, &a));
2549   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2550   for (r = 0; r < m; ++r) {
2551     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2552       a[r] = diagA[r];
2553       if (idx) idx[r] = cstart + diagIdx[r];
2554     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2555       a[r] = diagA[r];
2556       if (idx) {
2557         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2558           idx[r] = cstart + diagIdx[r];
2559         } else idx[r] = offdiagIdx[r];
2560       }
2561     } else {
2562       a[r] = offdiagA[r];
2563       if (idx) idx[r] = offdiagIdx[r];
2564     }
2565   }
2566   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2567   PetscCall(VecRestoreArrayWrite(v, &a));
2568   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2569   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2570   PetscCall(VecDestroy(&diagV));
2571   PetscCall(VecDestroy(&offdiagV));
2572   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2573   PetscFunctionReturn(PETSC_SUCCESS);
2574 }
2575 
2576 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2577 {
2578   Mat *dummy;
2579 
2580   PetscFunctionBegin;
2581   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2582   *newmat = *dummy;
2583   PetscCall(PetscFree(dummy));
2584   PetscFunctionReturn(PETSC_SUCCESS);
2585 }
2586 
2587 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2588 {
2589   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2590 
2591   PetscFunctionBegin;
2592   PetscCall(MatInvertBlockDiagonal(a->A, values));
2593   A->factorerrortype = a->A->factorerrortype;
2594   PetscFunctionReturn(PETSC_SUCCESS);
2595 }
2596 
2597 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2598 {
2599   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2600 
2601   PetscFunctionBegin;
2602   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2603   PetscCall(MatSetRandom(aij->A, rctx));
2604   if (x->assembled) {
2605     PetscCall(MatSetRandom(aij->B, rctx));
2606   } else {
2607     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2608   }
2609   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2610   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2611   PetscFunctionReturn(PETSC_SUCCESS);
2612 }
2613 
2614 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2615 {
2616   PetscFunctionBegin;
2617   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2618   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2619   PetscFunctionReturn(PETSC_SUCCESS);
2620 }
2621 
2622 /*@
2623    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2624 
2625    Not collective
2626 
2627    Input Parameter:
2628 .    A - the matrix
2629 
2630    Output Parameter:
2631 .    nz - the number of nonzeros
2632 
2633  Level: advanced
2634 
2635 .seealso: `MATMPIAIJ`, `Mat`
2636 @*/
2637 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2638 {
2639   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2640   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2641 
2642   PetscFunctionBegin;
2643   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2644   PetscFunctionReturn(PETSC_SUCCESS);
2645 }
2646 
2647 /*@
2648    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2649 
2650    Collective
2651 
2652    Input Parameters:
2653 +    A - the matrix
2654 -    sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2655 
2656  Level: advanced
2657 
2658 @*/
2659 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2660 {
2661   PetscFunctionBegin;
2662   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2663   PetscFunctionReturn(PETSC_SUCCESS);
2664 }
2665 
2666 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2667 {
2668   PetscBool sc = PETSC_FALSE, flg;
2669 
2670   PetscFunctionBegin;
2671   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2672   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2673   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2674   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2675   PetscOptionsHeadEnd();
2676   PetscFunctionReturn(PETSC_SUCCESS);
2677 }
2678 
2679 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2680 {
2681   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2682   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2683 
2684   PetscFunctionBegin;
2685   if (!Y->preallocated) {
2686     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2687   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2688     PetscInt nonew = aij->nonew;
2689     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2690     aij->nonew = nonew;
2691   }
2692   PetscCall(MatShift_Basic(Y, a));
2693   PetscFunctionReturn(PETSC_SUCCESS);
2694 }
2695 
2696 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2697 {
2698   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2699 
2700   PetscFunctionBegin;
2701   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2702   PetscCall(MatMissingDiagonal(a->A, missing, d));
2703   if (d) {
2704     PetscInt rstart;
2705     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2706     *d += rstart;
2707   }
2708   PetscFunctionReturn(PETSC_SUCCESS);
2709 }
2710 
2711 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2712 {
2713   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2714 
2715   PetscFunctionBegin;
2716   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2717   PetscFunctionReturn(PETSC_SUCCESS);
2718 }
2719 
2720 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A)
2721 {
2722   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2723 
2724   PetscFunctionBegin;
2725   PetscCall(MatEliminateZeros(a->A));
2726   PetscCall(MatEliminateZeros(a->B));
2727   PetscFunctionReturn(PETSC_SUCCESS);
2728 }
2729 
2730 /* -------------------------------------------------------------------*/
2731 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2732                                        MatGetRow_MPIAIJ,
2733                                        MatRestoreRow_MPIAIJ,
2734                                        MatMult_MPIAIJ,
2735                                        /* 4*/ MatMultAdd_MPIAIJ,
2736                                        MatMultTranspose_MPIAIJ,
2737                                        MatMultTransposeAdd_MPIAIJ,
2738                                        NULL,
2739                                        NULL,
2740                                        NULL,
2741                                        /*10*/ NULL,
2742                                        NULL,
2743                                        NULL,
2744                                        MatSOR_MPIAIJ,
2745                                        MatTranspose_MPIAIJ,
2746                                        /*15*/ MatGetInfo_MPIAIJ,
2747                                        MatEqual_MPIAIJ,
2748                                        MatGetDiagonal_MPIAIJ,
2749                                        MatDiagonalScale_MPIAIJ,
2750                                        MatNorm_MPIAIJ,
2751                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2752                                        MatAssemblyEnd_MPIAIJ,
2753                                        MatSetOption_MPIAIJ,
2754                                        MatZeroEntries_MPIAIJ,
2755                                        /*24*/ MatZeroRows_MPIAIJ,
2756                                        NULL,
2757                                        NULL,
2758                                        NULL,
2759                                        NULL,
2760                                        /*29*/ MatSetUp_MPI_Hash,
2761                                        NULL,
2762                                        NULL,
2763                                        MatGetDiagonalBlock_MPIAIJ,
2764                                        NULL,
2765                                        /*34*/ MatDuplicate_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        NULL,
2770                                        /*39*/ MatAXPY_MPIAIJ,
2771                                        MatCreateSubMatrices_MPIAIJ,
2772                                        MatIncreaseOverlap_MPIAIJ,
2773                                        MatGetValues_MPIAIJ,
2774                                        MatCopy_MPIAIJ,
2775                                        /*44*/ MatGetRowMax_MPIAIJ,
2776                                        MatScale_MPIAIJ,
2777                                        MatShift_MPIAIJ,
2778                                        MatDiagonalSet_MPIAIJ,
2779                                        MatZeroRowsColumns_MPIAIJ,
2780                                        /*49*/ MatSetRandom_MPIAIJ,
2781                                        MatGetRowIJ_MPIAIJ,
2782                                        MatRestoreRowIJ_MPIAIJ,
2783                                        NULL,
2784                                        NULL,
2785                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2786                                        NULL,
2787                                        MatSetUnfactored_MPIAIJ,
2788                                        MatPermute_MPIAIJ,
2789                                        NULL,
2790                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2791                                        MatDestroy_MPIAIJ,
2792                                        MatView_MPIAIJ,
2793                                        NULL,
2794                                        NULL,
2795                                        /*64*/ NULL,
2796                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2797                                        NULL,
2798                                        NULL,
2799                                        NULL,
2800                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2801                                        MatGetRowMinAbs_MPIAIJ,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        /*75*/ MatFDColoringApply_AIJ,
2807                                        MatSetFromOptions_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        MatFindZeroDiagonals_MPIAIJ,
2811                                        /*80*/ NULL,
2812                                        NULL,
2813                                        NULL,
2814                                        /*83*/ MatLoad_MPIAIJ,
2815                                        MatIsSymmetric_MPIAIJ,
2816                                        NULL,
2817                                        NULL,
2818                                        NULL,
2819                                        NULL,
2820                                        /*89*/ NULL,
2821                                        NULL,
2822                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2823                                        NULL,
2824                                        NULL,
2825                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2826                                        NULL,
2827                                        NULL,
2828                                        NULL,
2829                                        MatBindToCPU_MPIAIJ,
2830                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        MatConjugate_MPIAIJ,
2834                                        NULL,
2835                                        /*104*/ MatSetValuesRow_MPIAIJ,
2836                                        MatRealPart_MPIAIJ,
2837                                        MatImaginaryPart_MPIAIJ,
2838                                        NULL,
2839                                        NULL,
2840                                        /*109*/ NULL,
2841                                        NULL,
2842                                        MatGetRowMin_MPIAIJ,
2843                                        NULL,
2844                                        MatMissingDiagonal_MPIAIJ,
2845                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2846                                        NULL,
2847                                        MatGetGhosts_MPIAIJ,
2848                                        NULL,
2849                                        NULL,
2850                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2851                                        NULL,
2852                                        NULL,
2853                                        NULL,
2854                                        MatGetMultiProcBlock_MPIAIJ,
2855                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2856                                        MatGetColumnReductions_MPIAIJ,
2857                                        MatInvertBlockDiagonal_MPIAIJ,
2858                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2859                                        MatCreateSubMatricesMPI_MPIAIJ,
2860                                        /*129*/ NULL,
2861                                        NULL,
2862                                        NULL,
2863                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2864                                        NULL,
2865                                        /*134*/ NULL,
2866                                        NULL,
2867                                        NULL,
2868                                        NULL,
2869                                        NULL,
2870                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2871                                        NULL,
2872                                        NULL,
2873                                        MatFDColoringSetUp_MPIXAIJ,
2874                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2875                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2876                                        /*145*/ NULL,
2877                                        NULL,
2878                                        NULL,
2879                                        MatCreateGraph_Simple_AIJ,
2880                                        NULL,
2881                                        /*150*/ NULL,
2882                                        MatEliminateZeros_MPIAIJ};
2883 
2884 /* ----------------------------------------------------------------------------------------*/
2885 
2886 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2887 {
2888   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2889 
2890   PetscFunctionBegin;
2891   PetscCall(MatStoreValues(aij->A));
2892   PetscCall(MatStoreValues(aij->B));
2893   PetscFunctionReturn(PETSC_SUCCESS);
2894 }
2895 
2896 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2897 {
2898   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2899 
2900   PetscFunctionBegin;
2901   PetscCall(MatRetrieveValues(aij->A));
2902   PetscCall(MatRetrieveValues(aij->B));
2903   PetscFunctionReturn(PETSC_SUCCESS);
2904 }
2905 
2906 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2907 {
2908   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2909   PetscMPIInt size;
2910 
2911   PetscFunctionBegin;
2912   if (B->hash_active) {
2913     PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops))));
2914     B->hash_active = PETSC_FALSE;
2915   }
2916   PetscCall(PetscLayoutSetUp(B->rmap));
2917   PetscCall(PetscLayoutSetUp(B->cmap));
2918 
2919 #if defined(PETSC_USE_CTABLE)
2920   PetscCall(PetscHMapIDestroy(&b->colmap));
2921 #else
2922   PetscCall(PetscFree(b->colmap));
2923 #endif
2924   PetscCall(PetscFree(b->garray));
2925   PetscCall(VecDestroy(&b->lvec));
2926   PetscCall(VecScatterDestroy(&b->Mvctx));
2927 
2928   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2929   PetscCall(MatDestroy(&b->B));
2930   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2931   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2932   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2933   PetscCall(MatSetType(b->B, MATSEQAIJ));
2934 
2935   PetscCall(MatDestroy(&b->A));
2936   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2937   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2938   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2939   PetscCall(MatSetType(b->A, MATSEQAIJ));
2940 
2941   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2942   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2943   B->preallocated  = PETSC_TRUE;
2944   B->was_assembled = PETSC_FALSE;
2945   B->assembled     = PETSC_FALSE;
2946   PetscFunctionReturn(PETSC_SUCCESS);
2947 }
2948 
2949 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2950 {
2951   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2952 
2953   PetscFunctionBegin;
2954   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2955   PetscCall(PetscLayoutSetUp(B->rmap));
2956   PetscCall(PetscLayoutSetUp(B->cmap));
2957 
2958 #if defined(PETSC_USE_CTABLE)
2959   PetscCall(PetscHMapIDestroy(&b->colmap));
2960 #else
2961   PetscCall(PetscFree(b->colmap));
2962 #endif
2963   PetscCall(PetscFree(b->garray));
2964   PetscCall(VecDestroy(&b->lvec));
2965   PetscCall(VecScatterDestroy(&b->Mvctx));
2966 
2967   PetscCall(MatResetPreallocation(b->A));
2968   PetscCall(MatResetPreallocation(b->B));
2969   B->preallocated  = PETSC_TRUE;
2970   B->was_assembled = PETSC_FALSE;
2971   B->assembled     = PETSC_FALSE;
2972   PetscFunctionReturn(PETSC_SUCCESS);
2973 }
2974 
2975 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2976 {
2977   Mat         mat;
2978   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2979 
2980   PetscFunctionBegin;
2981   *newmat = NULL;
2982   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2983   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2984   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2985   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2986   a = (Mat_MPIAIJ *)mat->data;
2987 
2988   mat->factortype   = matin->factortype;
2989   mat->assembled    = matin->assembled;
2990   mat->insertmode   = NOT_SET_VALUES;
2991   mat->preallocated = matin->preallocated;
2992 
2993   a->size         = oldmat->size;
2994   a->rank         = oldmat->rank;
2995   a->donotstash   = oldmat->donotstash;
2996   a->roworiented  = oldmat->roworiented;
2997   a->rowindices   = NULL;
2998   a->rowvalues    = NULL;
2999   a->getrowactive = PETSC_FALSE;
3000 
3001   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3002   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3003 
3004   if (oldmat->colmap) {
3005 #if defined(PETSC_USE_CTABLE)
3006     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3007 #else
3008     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3009     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3010 #endif
3011   } else a->colmap = NULL;
3012   if (oldmat->garray) {
3013     PetscInt len;
3014     len = oldmat->B->cmap->n;
3015     PetscCall(PetscMalloc1(len + 1, &a->garray));
3016     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3017   } else a->garray = NULL;
3018 
3019   /* It may happen MatDuplicate is called with a non-assembled matrix
3020      In fact, MatDuplicate only requires the matrix to be preallocated
3021      This may happen inside a DMCreateMatrix_Shell */
3022   if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); }
3023   if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); }
3024   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3025   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3026   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3027   *newmat = mat;
3028   PetscFunctionReturn(PETSC_SUCCESS);
3029 }
3030 
3031 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3032 {
3033   PetscBool isbinary, ishdf5;
3034 
3035   PetscFunctionBegin;
3036   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3037   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3038   /* force binary viewer to load .info file if it has not yet done so */
3039   PetscCall(PetscViewerSetUp(viewer));
3040   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3041   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3042   if (isbinary) {
3043     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3044   } else if (ishdf5) {
3045 #if defined(PETSC_HAVE_HDF5)
3046     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3047 #else
3048     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3049 #endif
3050   } else {
3051     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3052   }
3053   PetscFunctionReturn(PETSC_SUCCESS);
3054 }
3055 
3056 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3057 {
3058   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3059   PetscInt    *rowidxs, *colidxs;
3060   PetscScalar *matvals;
3061 
3062   PetscFunctionBegin;
3063   PetscCall(PetscViewerSetUp(viewer));
3064 
3065   /* read in matrix header */
3066   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3067   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3068   M  = header[1];
3069   N  = header[2];
3070   nz = header[3];
3071   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3072   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3073   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3074 
3075   /* set block sizes from the viewer's .info file */
3076   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3077   /* set global sizes if not set already */
3078   if (mat->rmap->N < 0) mat->rmap->N = M;
3079   if (mat->cmap->N < 0) mat->cmap->N = N;
3080   PetscCall(PetscLayoutSetUp(mat->rmap));
3081   PetscCall(PetscLayoutSetUp(mat->cmap));
3082 
3083   /* check if the matrix sizes are correct */
3084   PetscCall(MatGetSize(mat, &rows, &cols));
3085   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3086 
3087   /* read in row lengths and build row indices */
3088   PetscCall(MatGetLocalSize(mat, &m, NULL));
3089   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3090   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3091   rowidxs[0] = 0;
3092   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3093   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3094   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3095   /* read in column indices and matrix values */
3096   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3097   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3098   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3099   /* store matrix indices and values */
3100   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3101   PetscCall(PetscFree(rowidxs));
3102   PetscCall(PetscFree2(colidxs, matvals));
3103   PetscFunctionReturn(PETSC_SUCCESS);
3104 }
3105 
3106 /* Not scalable because of ISAllGather() unless getting all columns. */
3107 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3108 {
3109   IS          iscol_local;
3110   PetscBool   isstride;
3111   PetscMPIInt lisstride = 0, gisstride;
3112 
3113   PetscFunctionBegin;
3114   /* check if we are grabbing all columns*/
3115   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3116 
3117   if (isstride) {
3118     PetscInt start, len, mstart, mlen;
3119     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3120     PetscCall(ISGetLocalSize(iscol, &len));
3121     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3122     if (mstart == start && mlen - mstart == len) lisstride = 1;
3123   }
3124 
3125   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3126   if (gisstride) {
3127     PetscInt N;
3128     PetscCall(MatGetSize(mat, NULL, &N));
3129     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3130     PetscCall(ISSetIdentity(iscol_local));
3131     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3132   } else {
3133     PetscInt cbs;
3134     PetscCall(ISGetBlockSize(iscol, &cbs));
3135     PetscCall(ISAllGather(iscol, &iscol_local));
3136     PetscCall(ISSetBlockSize(iscol_local, cbs));
3137   }
3138 
3139   *isseq = iscol_local;
3140   PetscFunctionReturn(PETSC_SUCCESS);
3141 }
3142 
3143 /*
3144  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3145  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3146 
3147  Input Parameters:
3148    mat - matrix
3149    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3150            i.e., mat->rstart <= isrow[i] < mat->rend
3151    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3152            i.e., mat->cstart <= iscol[i] < mat->cend
3153  Output Parameter:
3154    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3155    iscol_o - sequential column index set for retrieving mat->B
3156    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3157  */
3158 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3159 {
3160   Vec             x, cmap;
3161   const PetscInt *is_idx;
3162   PetscScalar    *xarray, *cmaparray;
3163   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3164   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3165   Mat             B    = a->B;
3166   Vec             lvec = a->lvec, lcmap;
3167   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3168   MPI_Comm        comm;
3169   VecScatter      Mvctx = a->Mvctx;
3170 
3171   PetscFunctionBegin;
3172   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3173   PetscCall(ISGetLocalSize(iscol, &ncols));
3174 
3175   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3176   PetscCall(MatCreateVecs(mat, &x, NULL));
3177   PetscCall(VecSet(x, -1.0));
3178   PetscCall(VecDuplicate(x, &cmap));
3179   PetscCall(VecSet(cmap, -1.0));
3180 
3181   /* Get start indices */
3182   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3183   isstart -= ncols;
3184   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3185 
3186   PetscCall(ISGetIndices(iscol, &is_idx));
3187   PetscCall(VecGetArray(x, &xarray));
3188   PetscCall(VecGetArray(cmap, &cmaparray));
3189   PetscCall(PetscMalloc1(ncols, &idx));
3190   for (i = 0; i < ncols; i++) {
3191     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3192     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3193     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3194   }
3195   PetscCall(VecRestoreArray(x, &xarray));
3196   PetscCall(VecRestoreArray(cmap, &cmaparray));
3197   PetscCall(ISRestoreIndices(iscol, &is_idx));
3198 
3199   /* Get iscol_d */
3200   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3201   PetscCall(ISGetBlockSize(iscol, &i));
3202   PetscCall(ISSetBlockSize(*iscol_d, i));
3203 
3204   /* Get isrow_d */
3205   PetscCall(ISGetLocalSize(isrow, &m));
3206   rstart = mat->rmap->rstart;
3207   PetscCall(PetscMalloc1(m, &idx));
3208   PetscCall(ISGetIndices(isrow, &is_idx));
3209   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3210   PetscCall(ISRestoreIndices(isrow, &is_idx));
3211 
3212   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3213   PetscCall(ISGetBlockSize(isrow, &i));
3214   PetscCall(ISSetBlockSize(*isrow_d, i));
3215 
3216   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3217   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3218   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3219 
3220   PetscCall(VecDuplicate(lvec, &lcmap));
3221 
3222   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3223   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3224 
3225   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3226   /* off-process column indices */
3227   count = 0;
3228   PetscCall(PetscMalloc1(Bn, &idx));
3229   PetscCall(PetscMalloc1(Bn, &cmap1));
3230 
3231   PetscCall(VecGetArray(lvec, &xarray));
3232   PetscCall(VecGetArray(lcmap, &cmaparray));
3233   for (i = 0; i < Bn; i++) {
3234     if (PetscRealPart(xarray[i]) > -1.0) {
3235       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3236       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3237       count++;
3238     }
3239   }
3240   PetscCall(VecRestoreArray(lvec, &xarray));
3241   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3242 
3243   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3244   /* cannot ensure iscol_o has same blocksize as iscol! */
3245 
3246   PetscCall(PetscFree(idx));
3247   *garray = cmap1;
3248 
3249   PetscCall(VecDestroy(&x));
3250   PetscCall(VecDestroy(&cmap));
3251   PetscCall(VecDestroy(&lcmap));
3252   PetscFunctionReturn(PETSC_SUCCESS);
3253 }
3254 
3255 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3256 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3257 {
3258   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3259   Mat         M = NULL;
3260   MPI_Comm    comm;
3261   IS          iscol_d, isrow_d, iscol_o;
3262   Mat         Asub = NULL, Bsub = NULL;
3263   PetscInt    n;
3264 
3265   PetscFunctionBegin;
3266   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3267 
3268   if (call == MAT_REUSE_MATRIX) {
3269     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3270     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3271     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3272 
3273     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3274     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3275 
3276     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3277     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3278 
3279     /* Update diagonal and off-diagonal portions of submat */
3280     asub = (Mat_MPIAIJ *)(*submat)->data;
3281     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3282     PetscCall(ISGetLocalSize(iscol_o, &n));
3283     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3284     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3285     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3286 
3287   } else { /* call == MAT_INITIAL_MATRIX) */
3288     const PetscInt *garray;
3289     PetscInt        BsubN;
3290 
3291     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3292     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3293 
3294     /* Create local submatrices Asub and Bsub */
3295     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3296     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3297 
3298     /* Create submatrix M */
3299     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3300 
3301     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3302     asub = (Mat_MPIAIJ *)M->data;
3303 
3304     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3305     n = asub->B->cmap->N;
3306     if (BsubN > n) {
3307       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3308       const PetscInt *idx;
3309       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3310       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3311 
3312       PetscCall(PetscMalloc1(n, &idx_new));
3313       j = 0;
3314       PetscCall(ISGetIndices(iscol_o, &idx));
3315       for (i = 0; i < n; i++) {
3316         if (j >= BsubN) break;
3317         while (subgarray[i] > garray[j]) j++;
3318 
3319         if (subgarray[i] == garray[j]) {
3320           idx_new[i] = idx[j++];
3321         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3322       }
3323       PetscCall(ISRestoreIndices(iscol_o, &idx));
3324 
3325       PetscCall(ISDestroy(&iscol_o));
3326       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3327 
3328     } else if (BsubN < n) {
3329       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3330     }
3331 
3332     PetscCall(PetscFree(garray));
3333     *submat = M;
3334 
3335     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3336     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3337     PetscCall(ISDestroy(&isrow_d));
3338 
3339     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3340     PetscCall(ISDestroy(&iscol_d));
3341 
3342     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3343     PetscCall(ISDestroy(&iscol_o));
3344   }
3345   PetscFunctionReturn(PETSC_SUCCESS);
3346 }
3347 
3348 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3349 {
3350   IS        iscol_local = NULL, isrow_d;
3351   PetscInt  csize;
3352   PetscInt  n, i, j, start, end;
3353   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3354   MPI_Comm  comm;
3355 
3356   PetscFunctionBegin;
3357   /* If isrow has same processor distribution as mat,
3358      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3359   if (call == MAT_REUSE_MATRIX) {
3360     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3361     if (isrow_d) {
3362       sameRowDist  = PETSC_TRUE;
3363       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3364     } else {
3365       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3366       if (iscol_local) {
3367         sameRowDist  = PETSC_TRUE;
3368         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3369       }
3370     }
3371   } else {
3372     /* Check if isrow has same processor distribution as mat */
3373     sameDist[0] = PETSC_FALSE;
3374     PetscCall(ISGetLocalSize(isrow, &n));
3375     if (!n) {
3376       sameDist[0] = PETSC_TRUE;
3377     } else {
3378       PetscCall(ISGetMinMax(isrow, &i, &j));
3379       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3380       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3381     }
3382 
3383     /* Check if iscol has same processor distribution as mat */
3384     sameDist[1] = PETSC_FALSE;
3385     PetscCall(ISGetLocalSize(iscol, &n));
3386     if (!n) {
3387       sameDist[1] = PETSC_TRUE;
3388     } else {
3389       PetscCall(ISGetMinMax(iscol, &i, &j));
3390       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3391       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3392     }
3393 
3394     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3395     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3396     sameRowDist = tsameDist[0];
3397   }
3398 
3399   if (sameRowDist) {
3400     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3401       /* isrow and iscol have same processor distribution as mat */
3402       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3403       PetscFunctionReturn(PETSC_SUCCESS);
3404     } else { /* sameRowDist */
3405       /* isrow has same processor distribution as mat */
3406       if (call == MAT_INITIAL_MATRIX) {
3407         PetscBool sorted;
3408         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3409         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3410         PetscCall(ISGetSize(iscol, &i));
3411         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3412 
3413         PetscCall(ISSorted(iscol_local, &sorted));
3414         if (sorted) {
3415           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3416           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3417           PetscFunctionReturn(PETSC_SUCCESS);
3418         }
3419       } else { /* call == MAT_REUSE_MATRIX */
3420         IS iscol_sub;
3421         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3422         if (iscol_sub) {
3423           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3424           PetscFunctionReturn(PETSC_SUCCESS);
3425         }
3426       }
3427     }
3428   }
3429 
3430   /* General case: iscol -> iscol_local which has global size of iscol */
3431   if (call == MAT_REUSE_MATRIX) {
3432     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3433     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3434   } else {
3435     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3436   }
3437 
3438   PetscCall(ISGetLocalSize(iscol, &csize));
3439   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3440 
3441   if (call == MAT_INITIAL_MATRIX) {
3442     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3443     PetscCall(ISDestroy(&iscol_local));
3444   }
3445   PetscFunctionReturn(PETSC_SUCCESS);
3446 }
3447 
3448 /*@C
3449      MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3450          and "off-diagonal" part of the matrix in CSR format.
3451 
3452    Collective
3453 
3454    Input Parameters:
3455 +  comm - MPI communicator
3456 .  A - "diagonal" portion of matrix
3457 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3458 -  garray - global index of B columns
3459 
3460    Output Parameter:
3461 .   mat - the matrix, with input A as its local diagonal matrix
3462    Level: advanced
3463 
3464    Notes:
3465    See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3466 
3467    A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3468 
3469 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3470 @*/
3471 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3472 {
3473   Mat_MPIAIJ        *maij;
3474   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3475   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3476   const PetscScalar *oa;
3477   Mat                Bnew;
3478   PetscInt           m, n, N;
3479   MatType            mpi_mat_type;
3480 
3481   PetscFunctionBegin;
3482   PetscCall(MatCreate(comm, mat));
3483   PetscCall(MatGetSize(A, &m, &n));
3484   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3485   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3486   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3487   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3488 
3489   /* Get global columns of mat */
3490   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3491 
3492   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3493   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3494   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3495   PetscCall(MatSetType(*mat, mpi_mat_type));
3496 
3497   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3498   maij = (Mat_MPIAIJ *)(*mat)->data;
3499 
3500   (*mat)->preallocated = PETSC_TRUE;
3501 
3502   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3503   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3504 
3505   /* Set A as diagonal portion of *mat */
3506   maij->A = A;
3507 
3508   nz = oi[m];
3509   for (i = 0; i < nz; i++) {
3510     col   = oj[i];
3511     oj[i] = garray[col];
3512   }
3513 
3514   /* Set Bnew as off-diagonal portion of *mat */
3515   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3516   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3517   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3518   bnew        = (Mat_SeqAIJ *)Bnew->data;
3519   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3520   maij->B     = Bnew;
3521 
3522   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3523 
3524   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3525   b->free_a       = PETSC_FALSE;
3526   b->free_ij      = PETSC_FALSE;
3527   PetscCall(MatDestroy(&B));
3528 
3529   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3530   bnew->free_a       = PETSC_TRUE;
3531   bnew->free_ij      = PETSC_TRUE;
3532 
3533   /* condense columns of maij->B */
3534   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3535   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3536   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3537   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3538   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3539   PetscFunctionReturn(PETSC_SUCCESS);
3540 }
3541 
3542 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3543 
3544 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3545 {
3546   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3547   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3548   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3549   Mat             M, Msub, B = a->B;
3550   MatScalar      *aa;
3551   Mat_SeqAIJ     *aij;
3552   PetscInt       *garray = a->garray, *colsub, Ncols;
3553   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3554   IS              iscol_sub, iscmap;
3555   const PetscInt *is_idx, *cmap;
3556   PetscBool       allcolumns = PETSC_FALSE;
3557   MPI_Comm        comm;
3558 
3559   PetscFunctionBegin;
3560   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3561   if (call == MAT_REUSE_MATRIX) {
3562     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3563     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3564     PetscCall(ISGetLocalSize(iscol_sub, &count));
3565 
3566     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3567     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3568 
3569     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3570     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3571 
3572     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3573 
3574   } else { /* call == MAT_INITIAL_MATRIX) */
3575     PetscBool flg;
3576 
3577     PetscCall(ISGetLocalSize(iscol, &n));
3578     PetscCall(ISGetSize(iscol, &Ncols));
3579 
3580     /* (1) iscol -> nonscalable iscol_local */
3581     /* Check for special case: each processor gets entire matrix columns */
3582     PetscCall(ISIdentity(iscol_local, &flg));
3583     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3584     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3585     if (allcolumns) {
3586       iscol_sub = iscol_local;
3587       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3588       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3589 
3590     } else {
3591       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3592       PetscInt *idx, *cmap1, k;
3593       PetscCall(PetscMalloc1(Ncols, &idx));
3594       PetscCall(PetscMalloc1(Ncols, &cmap1));
3595       PetscCall(ISGetIndices(iscol_local, &is_idx));
3596       count = 0;
3597       k     = 0;
3598       for (i = 0; i < Ncols; i++) {
3599         j = is_idx[i];
3600         if (j >= cstart && j < cend) {
3601           /* diagonal part of mat */
3602           idx[count]     = j;
3603           cmap1[count++] = i; /* column index in submat */
3604         } else if (Bn) {
3605           /* off-diagonal part of mat */
3606           if (j == garray[k]) {
3607             idx[count]     = j;
3608             cmap1[count++] = i; /* column index in submat */
3609           } else if (j > garray[k]) {
3610             while (j > garray[k] && k < Bn - 1) k++;
3611             if (j == garray[k]) {
3612               idx[count]     = j;
3613               cmap1[count++] = i; /* column index in submat */
3614             }
3615           }
3616         }
3617       }
3618       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3619 
3620       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3621       PetscCall(ISGetBlockSize(iscol, &cbs));
3622       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3623 
3624       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3625     }
3626 
3627     /* (3) Create sequential Msub */
3628     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3629   }
3630 
3631   PetscCall(ISGetLocalSize(iscol_sub, &count));
3632   aij = (Mat_SeqAIJ *)(Msub)->data;
3633   ii  = aij->i;
3634   PetscCall(ISGetIndices(iscmap, &cmap));
3635 
3636   /*
3637       m - number of local rows
3638       Ncols - number of columns (same on all processors)
3639       rstart - first row in new global matrix generated
3640   */
3641   PetscCall(MatGetSize(Msub, &m, NULL));
3642 
3643   if (call == MAT_INITIAL_MATRIX) {
3644     /* (4) Create parallel newmat */
3645     PetscMPIInt rank, size;
3646     PetscInt    csize;
3647 
3648     PetscCallMPI(MPI_Comm_size(comm, &size));
3649     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3650 
3651     /*
3652         Determine the number of non-zeros in the diagonal and off-diagonal
3653         portions of the matrix in order to do correct preallocation
3654     */
3655 
3656     /* first get start and end of "diagonal" columns */
3657     PetscCall(ISGetLocalSize(iscol, &csize));
3658     if (csize == PETSC_DECIDE) {
3659       PetscCall(ISGetSize(isrow, &mglobal));
3660       if (mglobal == Ncols) { /* square matrix */
3661         nlocal = m;
3662       } else {
3663         nlocal = Ncols / size + ((Ncols % size) > rank);
3664       }
3665     } else {
3666       nlocal = csize;
3667     }
3668     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3669     rstart = rend - nlocal;
3670     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3671 
3672     /* next, compute all the lengths */
3673     jj = aij->j;
3674     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3675     olens = dlens + m;
3676     for (i = 0; i < m; i++) {
3677       jend = ii[i + 1] - ii[i];
3678       olen = 0;
3679       dlen = 0;
3680       for (j = 0; j < jend; j++) {
3681         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3682         else dlen++;
3683         jj++;
3684       }
3685       olens[i] = olen;
3686       dlens[i] = dlen;
3687     }
3688 
3689     PetscCall(ISGetBlockSize(isrow, &bs));
3690     PetscCall(ISGetBlockSize(iscol, &cbs));
3691 
3692     PetscCall(MatCreate(comm, &M));
3693     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3694     PetscCall(MatSetBlockSizes(M, bs, cbs));
3695     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3696     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3697     PetscCall(PetscFree(dlens));
3698 
3699   } else { /* call == MAT_REUSE_MATRIX */
3700     M = *newmat;
3701     PetscCall(MatGetLocalSize(M, &i, NULL));
3702     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3703     PetscCall(MatZeroEntries(M));
3704     /*
3705          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3706        rather than the slower MatSetValues().
3707     */
3708     M->was_assembled = PETSC_TRUE;
3709     M->assembled     = PETSC_FALSE;
3710   }
3711 
3712   /* (5) Set values of Msub to *newmat */
3713   PetscCall(PetscMalloc1(count, &colsub));
3714   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3715 
3716   jj = aij->j;
3717   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3718   for (i = 0; i < m; i++) {
3719     row = rstart + i;
3720     nz  = ii[i + 1] - ii[i];
3721     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3722     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3723     jj += nz;
3724     aa += nz;
3725   }
3726   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3727   PetscCall(ISRestoreIndices(iscmap, &cmap));
3728 
3729   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3730   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3731 
3732   PetscCall(PetscFree(colsub));
3733 
3734   /* save Msub, iscol_sub and iscmap used in processor for next request */
3735   if (call == MAT_INITIAL_MATRIX) {
3736     *newmat = M;
3737     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3738     PetscCall(MatDestroy(&Msub));
3739 
3740     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3741     PetscCall(ISDestroy(&iscol_sub));
3742 
3743     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3744     PetscCall(ISDestroy(&iscmap));
3745 
3746     if (iscol_local) {
3747       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3748       PetscCall(ISDestroy(&iscol_local));
3749     }
3750   }
3751   PetscFunctionReturn(PETSC_SUCCESS);
3752 }
3753 
3754 /*
3755     Not great since it makes two copies of the submatrix, first an SeqAIJ
3756   in local and then by concatenating the local matrices the end result.
3757   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3758 
3759   This requires a sequential iscol with all indices.
3760 */
3761 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3762 {
3763   PetscMPIInt rank, size;
3764   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3765   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3766   Mat         M, Mreuse;
3767   MatScalar  *aa, *vwork;
3768   MPI_Comm    comm;
3769   Mat_SeqAIJ *aij;
3770   PetscBool   colflag, allcolumns = PETSC_FALSE;
3771 
3772   PetscFunctionBegin;
3773   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3774   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3775   PetscCallMPI(MPI_Comm_size(comm, &size));
3776 
3777   /* Check for special case: each processor gets entire matrix columns */
3778   PetscCall(ISIdentity(iscol, &colflag));
3779   PetscCall(ISGetLocalSize(iscol, &n));
3780   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3781   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3782 
3783   if (call == MAT_REUSE_MATRIX) {
3784     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3785     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3786     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3787   } else {
3788     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3789   }
3790 
3791   /*
3792       m - number of local rows
3793       n - number of columns (same on all processors)
3794       rstart - first row in new global matrix generated
3795   */
3796   PetscCall(MatGetSize(Mreuse, &m, &n));
3797   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3798   if (call == MAT_INITIAL_MATRIX) {
3799     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3800     ii  = aij->i;
3801     jj  = aij->j;
3802 
3803     /*
3804         Determine the number of non-zeros in the diagonal and off-diagonal
3805         portions of the matrix in order to do correct preallocation
3806     */
3807 
3808     /* first get start and end of "diagonal" columns */
3809     if (csize == PETSC_DECIDE) {
3810       PetscCall(ISGetSize(isrow, &mglobal));
3811       if (mglobal == n) { /* square matrix */
3812         nlocal = m;
3813       } else {
3814         nlocal = n / size + ((n % size) > rank);
3815       }
3816     } else {
3817       nlocal = csize;
3818     }
3819     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3820     rstart = rend - nlocal;
3821     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3822 
3823     /* next, compute all the lengths */
3824     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3825     olens = dlens + m;
3826     for (i = 0; i < m; i++) {
3827       jend = ii[i + 1] - ii[i];
3828       olen = 0;
3829       dlen = 0;
3830       for (j = 0; j < jend; j++) {
3831         if (*jj < rstart || *jj >= rend) olen++;
3832         else dlen++;
3833         jj++;
3834       }
3835       olens[i] = olen;
3836       dlens[i] = dlen;
3837     }
3838     PetscCall(MatCreate(comm, &M));
3839     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3840     PetscCall(MatSetBlockSizes(M, bs, cbs));
3841     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3842     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3843     PetscCall(PetscFree(dlens));
3844   } else {
3845     PetscInt ml, nl;
3846 
3847     M = *newmat;
3848     PetscCall(MatGetLocalSize(M, &ml, &nl));
3849     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3850     PetscCall(MatZeroEntries(M));
3851     /*
3852          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3853        rather than the slower MatSetValues().
3854     */
3855     M->was_assembled = PETSC_TRUE;
3856     M->assembled     = PETSC_FALSE;
3857   }
3858   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3859   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3860   ii  = aij->i;
3861   jj  = aij->j;
3862 
3863   /* trigger copy to CPU if needed */
3864   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3865   for (i = 0; i < m; i++) {
3866     row   = rstart + i;
3867     nz    = ii[i + 1] - ii[i];
3868     cwork = jj;
3869     jj += nz;
3870     vwork = aa;
3871     aa += nz;
3872     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3873   }
3874   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3875 
3876   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3877   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3878   *newmat = M;
3879 
3880   /* save submatrix used in processor for next request */
3881   if (call == MAT_INITIAL_MATRIX) {
3882     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3883     PetscCall(MatDestroy(&Mreuse));
3884   }
3885   PetscFunctionReturn(PETSC_SUCCESS);
3886 }
3887 
3888 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3889 {
3890   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3891   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3892   const PetscInt *JJ;
3893   PetscBool       nooffprocentries;
3894   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3895 
3896   PetscFunctionBegin;
3897   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3898 
3899   PetscCall(PetscLayoutSetUp(B->rmap));
3900   PetscCall(PetscLayoutSetUp(B->cmap));
3901   m      = B->rmap->n;
3902   cstart = B->cmap->rstart;
3903   cend   = B->cmap->rend;
3904   rstart = B->rmap->rstart;
3905 
3906   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3907 
3908   if (PetscDefined(USE_DEBUG)) {
3909     for (i = 0; i < m; i++) {
3910       nnz = Ii[i + 1] - Ii[i];
3911       JJ  = J + Ii[i];
3912       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3913       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3914       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3915     }
3916   }
3917 
3918   for (i = 0; i < m; i++) {
3919     nnz     = Ii[i + 1] - Ii[i];
3920     JJ      = J + Ii[i];
3921     nnz_max = PetscMax(nnz_max, nnz);
3922     d       = 0;
3923     for (j = 0; j < nnz; j++) {
3924       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3925     }
3926     d_nnz[i] = d;
3927     o_nnz[i] = nnz - d;
3928   }
3929   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3930   PetscCall(PetscFree2(d_nnz, o_nnz));
3931 
3932   for (i = 0; i < m; i++) {
3933     ii = i + rstart;
3934     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3935   }
3936   nooffprocentries    = B->nooffprocentries;
3937   B->nooffprocentries = PETSC_TRUE;
3938   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3939   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3940   B->nooffprocentries = nooffprocentries;
3941 
3942   /* count number of entries below block diagonal */
3943   PetscCall(PetscFree(Aij->ld));
3944   PetscCall(PetscCalloc1(m, &ld));
3945   Aij->ld = ld;
3946   for (i = 0; i < m; i++) {
3947     nnz = Ii[i + 1] - Ii[i];
3948     j   = 0;
3949     while (j < nnz && J[j] < cstart) j++;
3950     ld[i] = j;
3951     J += nnz;
3952   }
3953 
3954   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3955   PetscFunctionReturn(PETSC_SUCCESS);
3956 }
3957 
3958 /*@
3959    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3960    (the default parallel PETSc format).
3961 
3962    Collective
3963 
3964    Input Parameters:
3965 +  B - the matrix
3966 .  i - the indices into j for the start of each local row (starts with zero)
3967 .  j - the column indices for each local row (starts with zero)
3968 -  v - optional values in the matrix
3969 
3970    Level: developer
3971 
3972    Notes:
3973        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3974      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3975      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3976 
3977        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3978 
3979        The format which is used for the sparse matrix input, is equivalent to a
3980     row-major ordering.. i.e for the following matrix, the input data expected is
3981     as shown
3982 
3983 $        1 0 0
3984 $        2 0 3     P0
3985 $       -------
3986 $        4 5 6     P1
3987 $
3988 $     Process0 [P0]: rows_owned=[0,1]
3989 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3990 $        j =  {0,0,2}  [size = 3]
3991 $        v =  {1,2,3}  [size = 3]
3992 $
3993 $     Process1 [P1]: rows_owned=[2]
3994 $        i =  {0,3}    [size = nrow+1  = 1+1]
3995 $        j =  {0,1,2}  [size = 3]
3996 $        v =  {4,5,6}  [size = 3]
3997 
3998 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3999           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
4000 @*/
4001 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4002 {
4003   PetscFunctionBegin;
4004   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4005   PetscFunctionReturn(PETSC_SUCCESS);
4006 }
4007 
4008 /*@C
4009    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4010    (the default parallel PETSc format).  For good matrix assembly performance
4011    the user should preallocate the matrix storage by setting the parameters
4012    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4013    performance can be increased by more than a factor of 50.
4014 
4015    Collective
4016 
4017    Input Parameters:
4018 +  B - the matrix
4019 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4020            (same value is used for all local rows)
4021 .  d_nnz - array containing the number of nonzeros in the various rows of the
4022            DIAGONAL portion of the local submatrix (possibly different for each row)
4023            or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure.
4024            The size of this array is equal to the number of local rows, i.e 'm'.
4025            For matrices that will be factored, you must leave room for (and set)
4026            the diagonal entry even if it is zero.
4027 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4028            submatrix (same value is used for all local rows).
4029 -  o_nnz - array containing the number of nonzeros in the various rows of the
4030            OFF-DIAGONAL portion of the local submatrix (possibly different for
4031            each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero
4032            structure. The size of this array is equal to the number
4033            of local rows, i.e 'm'.
4034 
4035    If the *_nnz parameter is given then the *_nz parameter is ignored
4036 
4037    The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77
4038    storage.  The stored row and column indices begin with zero.
4039    See [Sparse Matrices](sec_matsparse) for details.
4040 
4041    The parallel matrix is partitioned such that the first m0 rows belong to
4042    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4043    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4044 
4045    The DIAGONAL portion of the local submatrix of a processor can be defined
4046    as the submatrix which is obtained by extraction the part corresponding to
4047    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4048    first row that belongs to the processor, r2 is the last row belonging to
4049    the this processor, and c1-c2 is range of indices of the local part of a
4050    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4051    common case of a square matrix, the row and column ranges are the same and
4052    the DIAGONAL part is also square. The remaining portion of the local
4053    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4054 
4055    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4056 
4057    You can call MatGetInfo() to get information on how effective the preallocation was;
4058    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4059    You can also run with the option -info and look for messages with the string
4060    malloc in them to see if additional memory allocation was needed.
4061 
4062    Example usage:
4063 
4064    Consider the following 8x8 matrix with 34 non-zero values, that is
4065    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4066    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4067    as follows:
4068 
4069 .vb
4070             1  2  0  |  0  3  0  |  0  4
4071     Proc0   0  5  6  |  7  0  0  |  8  0
4072             9  0 10  | 11  0  0  | 12  0
4073     -------------------------------------
4074            13  0 14  | 15 16 17  |  0  0
4075     Proc1   0 18  0  | 19 20 21  |  0  0
4076             0  0  0  | 22 23  0  | 24  0
4077     -------------------------------------
4078     Proc2  25 26 27  |  0  0 28  | 29  0
4079            30  0  0  | 31 32 33  |  0 34
4080 .ve
4081 
4082    This can be represented as a collection of submatrices as:
4083 
4084 .vb
4085       A B C
4086       D E F
4087       G H I
4088 .ve
4089 
4090    Where the submatrices A,B,C are owned by proc0, D,E,F are
4091    owned by proc1, G,H,I are owned by proc2.
4092 
4093    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4094    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4095    The 'M','N' parameters are 8,8, and have the same values on all procs.
4096 
4097    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4098    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4099    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4100    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4101    part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ
4102    matrix, ans [DF] as another `MATSEQAIJ` matrix.
4103 
4104    When d_nz, o_nz parameters are specified, d_nz storage elements are
4105    allocated for every row of the local diagonal submatrix, and o_nz
4106    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4107    One way to choose d_nz and o_nz is to use the max nonzerors per local
4108    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4109    In this case, the values of d_nz,o_nz are:
4110 .vb
4111      proc0 : dnz = 2, o_nz = 2
4112      proc1 : dnz = 3, o_nz = 2
4113      proc2 : dnz = 1, o_nz = 4
4114 .ve
4115    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4116    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4117    for proc3. i.e we are using 12+15+10=37 storage locations to store
4118    34 values.
4119 
4120    When d_nnz, o_nnz parameters are specified, the storage is specified
4121    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4122    In the above case the values for d_nnz,o_nnz are:
4123 .vb
4124      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4125      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4126      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4127 .ve
4128    Here the space allocated is sum of all the above values i.e 34, and
4129    hence pre-allocation is perfect.
4130 
4131    Level: intermediate
4132 
4133 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4134           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4135 @*/
4136 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4137 {
4138   PetscFunctionBegin;
4139   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4140   PetscValidType(B, 1);
4141   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4142   PetscFunctionReturn(PETSC_SUCCESS);
4143 }
4144 
4145 /*@
4146      MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4147          CSR format for the local rows.
4148 
4149    Collective
4150 
4151    Input Parameters:
4152 +  comm - MPI communicator
4153 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4154 .  n - This value should be the same as the local size used in creating the
4155        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4156        calculated if N is given) For square matrices n is almost always m.
4157 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4158 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4159 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4160 .   j - column indices
4161 -   a - optional matrix values
4162 
4163    Output Parameter:
4164 .   mat - the matrix
4165 
4166    Level: intermediate
4167 
4168    Notes:
4169        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4170      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4171      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4172 
4173        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4174 
4175        The format which is used for the sparse matrix input, is equivalent to a
4176     row-major ordering.. i.e for the following matrix, the input data expected is
4177     as shown
4178 
4179        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4180 
4181 $        1 0 0
4182 $        2 0 3     P0
4183 $       -------
4184 $        4 5 6     P1
4185 $
4186 $     Process0 [P0]: rows_owned=[0,1]
4187 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4188 $        j =  {0,0,2}  [size = 3]
4189 $        v =  {1,2,3}  [size = 3]
4190 $
4191 $     Process1 [P1]: rows_owned=[2]
4192 $        i =  {0,3}    [size = nrow+1  = 1+1]
4193 $        j =  {0,1,2}  [size = 3]
4194 $        v =  {4,5,6}  [size = 3]
4195 
4196 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4197           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4198 @*/
4199 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4200 {
4201   PetscFunctionBegin;
4202   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4203   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4204   PetscCall(MatCreate(comm, mat));
4205   PetscCall(MatSetSizes(*mat, m, n, M, N));
4206   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4207   PetscCall(MatSetType(*mat, MATMPIAIJ));
4208   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4209   PetscFunctionReturn(PETSC_SUCCESS);
4210 }
4211 
4212 /*@
4213      MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4214          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()`
4215 
4216      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4217 
4218    Collective
4219 
4220    Input Parameters:
4221 +  mat - the matrix
4222 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4223 .  n - This value should be the same as the local size used in creating the
4224        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4225        calculated if N is given) For square matrices n is almost always m.
4226 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4227 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4228 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4229 .  J - column indices
4230 -  v - matrix values
4231 
4232    Level: intermediate
4233 
4234 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4235           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4236 @*/
4237 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4238 {
4239   PetscInt        nnz, i;
4240   PetscBool       nooffprocentries;
4241   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4242   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4243   PetscScalar    *ad, *ao;
4244   PetscInt        ldi, Iii, md;
4245   const PetscInt *Adi = Ad->i;
4246   PetscInt       *ld  = Aij->ld;
4247 
4248   PetscFunctionBegin;
4249   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4250   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4251   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4252   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4253 
4254   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4255   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4256 
4257   for (i = 0; i < m; i++) {
4258     nnz = Ii[i + 1] - Ii[i];
4259     Iii = Ii[i];
4260     ldi = ld[i];
4261     md  = Adi[i + 1] - Adi[i];
4262     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4263     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4264     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4265     ad += md;
4266     ao += nnz - md;
4267   }
4268   nooffprocentries      = mat->nooffprocentries;
4269   mat->nooffprocentries = PETSC_TRUE;
4270   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4271   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4272   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4273   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4274   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4275   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4276   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4277   mat->nooffprocentries = nooffprocentries;
4278   PetscFunctionReturn(PETSC_SUCCESS);
4279 }
4280 
4281 /*@
4282      MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4283 
4284    Collective
4285 
4286    Input Parameters:
4287 +  mat - the matrix
4288 -  v - matrix values, stored by row
4289 
4290    Level: intermediate
4291 
4292    Note:
4293    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4294 
4295 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4296           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4297 @*/
4298 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4299 {
4300   PetscInt        nnz, i, m;
4301   PetscBool       nooffprocentries;
4302   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4303   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4304   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4305   PetscScalar    *ad, *ao;
4306   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4307   PetscInt        ldi, Iii, md;
4308   PetscInt       *ld = Aij->ld;
4309 
4310   PetscFunctionBegin;
4311   m = mat->rmap->n;
4312 
4313   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4314   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4315   Iii = 0;
4316   for (i = 0; i < m; i++) {
4317     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4318     ldi = ld[i];
4319     md  = Adi[i + 1] - Adi[i];
4320     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4321     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4322     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4323     ad += md;
4324     ao += nnz - md;
4325     Iii += nnz;
4326   }
4327   nooffprocentries      = mat->nooffprocentries;
4328   mat->nooffprocentries = PETSC_TRUE;
4329   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4330   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4331   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4332   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4333   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4334   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4335   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4336   mat->nooffprocentries = nooffprocentries;
4337   PetscFunctionReturn(PETSC_SUCCESS);
4338 }
4339 
4340 /*@C
4341    MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4342    (the default parallel PETSc format).  For good matrix assembly performance
4343    the user should preallocate the matrix storage by setting the parameters
4344    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4345    performance can be increased by more than a factor of 50.
4346 
4347    Collective
4348 
4349    Input Parameters:
4350 +  comm - MPI communicator
4351 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4352            This value should be the same as the local size used in creating the
4353            y vector for the matrix-vector product y = Ax.
4354 .  n - This value should be the same as the local size used in creating the
4355        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4356        calculated if N is given) For square matrices n is almost always m.
4357 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4358 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4359 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4360            (same value is used for all local rows)
4361 .  d_nnz - array containing the number of nonzeros in the various rows of the
4362            DIAGONAL portion of the local submatrix (possibly different for each row)
4363            or NULL, if d_nz is used to specify the nonzero structure.
4364            The size of this array is equal to the number of local rows, i.e 'm'.
4365 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4366            submatrix (same value is used for all local rows).
4367 -  o_nnz - array containing the number of nonzeros in the various rows of the
4368            OFF-DIAGONAL portion of the local submatrix (possibly different for
4369            each row) or NULL, if o_nz is used to specify the nonzero
4370            structure. The size of this array is equal to the number
4371            of local rows, i.e 'm'.
4372 
4373    Output Parameter:
4374 .  A - the matrix
4375 
4376    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4377    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4378    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4379 
4380    Notes:
4381    If the *_nnz parameter is given then the *_nz parameter is ignored
4382 
4383    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4384    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4385    storage requirements for this matrix.
4386 
4387    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4388    processor than it must be used on all processors that share the object for
4389    that argument.
4390 
4391    The user MUST specify either the local or global matrix dimensions
4392    (possibly both).
4393 
4394    The parallel matrix is partitioned across processors such that the
4395    first m0 rows belong to process 0, the next m1 rows belong to
4396    process 1, the next m2 rows belong to process 2 etc.. where
4397    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4398    values corresponding to [m x N] submatrix.
4399 
4400    The columns are logically partitioned with the n0 columns belonging
4401    to 0th partition, the next n1 columns belonging to the next
4402    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4403 
4404    The DIAGONAL portion of the local submatrix on any given processor
4405    is the submatrix corresponding to the rows and columns m,n
4406    corresponding to the given processor. i.e diagonal matrix on
4407    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4408    etc. The remaining portion of the local submatrix [m x (N-n)]
4409    constitute the OFF-DIAGONAL portion. The example below better
4410    illustrates this concept.
4411 
4412    For a square global matrix we define each processor's diagonal portion
4413    to be its local rows and the corresponding columns (a square submatrix);
4414    each processor's off-diagonal portion encompasses the remainder of the
4415    local matrix (a rectangular submatrix).
4416 
4417    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4418 
4419    When calling this routine with a single process communicator, a matrix of
4420    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4421    type of communicator, use the construction mechanism
4422 .vb
4423      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4424 .ve
4425 
4426 $     MatCreate(...,&A);
4427 $     MatSetType(A,MATMPIAIJ);
4428 $     MatSetSizes(A, m,n,M,N);
4429 $     MatMPIAIJSetPreallocation(A,...);
4430 
4431    By default, this format uses inodes (identical nodes) when possible.
4432    We search for consecutive rows with the same nonzero structure, thereby
4433    reusing matrix information to achieve increased efficiency.
4434 
4435    Options Database Keys:
4436 +  -mat_no_inode  - Do not use inodes
4437 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4438 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4439         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4440         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4441 
4442    Example usage:
4443 
4444    Consider the following 8x8 matrix with 34 non-zero values, that is
4445    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4446    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4447    as follows
4448 
4449 .vb
4450             1  2  0  |  0  3  0  |  0  4
4451     Proc0   0  5  6  |  7  0  0  |  8  0
4452             9  0 10  | 11  0  0  | 12  0
4453     -------------------------------------
4454            13  0 14  | 15 16 17  |  0  0
4455     Proc1   0 18  0  | 19 20 21  |  0  0
4456             0  0  0  | 22 23  0  | 24  0
4457     -------------------------------------
4458     Proc2  25 26 27  |  0  0 28  | 29  0
4459            30  0  0  | 31 32 33  |  0 34
4460 .ve
4461 
4462    This can be represented as a collection of submatrices as
4463 
4464 .vb
4465       A B C
4466       D E F
4467       G H I
4468 .ve
4469 
4470    Where the submatrices A,B,C are owned by proc0, D,E,F are
4471    owned by proc1, G,H,I are owned by proc2.
4472 
4473    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4474    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4475    The 'M','N' parameters are 8,8, and have the same values on all procs.
4476 
4477    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4478    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4479    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4480    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4481    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4482    matrix, ans [DF] as another SeqAIJ matrix.
4483 
4484    When d_nz, o_nz parameters are specified, d_nz storage elements are
4485    allocated for every row of the local diagonal submatrix, and o_nz
4486    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4487    One way to choose d_nz and o_nz is to use the max nonzerors per local
4488    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4489    In this case, the values of d_nz,o_nz are
4490 .vb
4491      proc0 : dnz = 2, o_nz = 2
4492      proc1 : dnz = 3, o_nz = 2
4493      proc2 : dnz = 1, o_nz = 4
4494 .ve
4495    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4496    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4497    for proc3. i.e we are using 12+15+10=37 storage locations to store
4498    34 values.
4499 
4500    When d_nnz, o_nnz parameters are specified, the storage is specified
4501    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4502    In the above case the values for d_nnz,o_nnz are
4503 .vb
4504      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4505      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4506      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4507 .ve
4508    Here the space allocated is sum of all the above values i.e 34, and
4509    hence pre-allocation is perfect.
4510 
4511    Level: intermediate
4512 
4513 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4514           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4515 @*/
4516 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4517 {
4518   PetscMPIInt size;
4519 
4520   PetscFunctionBegin;
4521   PetscCall(MatCreate(comm, A));
4522   PetscCall(MatSetSizes(*A, m, n, M, N));
4523   PetscCallMPI(MPI_Comm_size(comm, &size));
4524   if (size > 1) {
4525     PetscCall(MatSetType(*A, MATMPIAIJ));
4526     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4527   } else {
4528     PetscCall(MatSetType(*A, MATSEQAIJ));
4529     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4530   }
4531   PetscFunctionReturn(PETSC_SUCCESS);
4532 }
4533 
4534 /*MC
4535     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4536 
4537     Synopsis:
4538     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4539 
4540     Not Collective
4541 
4542     Input Parameter:
4543 .   A - the `MATMPIAIJ` matrix
4544 
4545     Output Parameters:
4546 +   Ad - the diagonal portion of the matrix
4547 .   Ao - the off diagonal portion of the matrix
4548 .   colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4549 -   ierr - error code
4550 
4551      Level: advanced
4552 
4553     Note:
4554     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4555 
4556 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4557 M*/
4558 
4559 /*MC
4560     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4561 
4562     Synopsis:
4563     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4564 
4565     Not Collective
4566 
4567     Input Parameters:
4568 +   A - the `MATMPIAIJ` matrix
4569 .   Ad - the diagonal portion of the matrix
4570 .   Ao - the off diagonal portion of the matrix
4571 .   colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4572 -   ierr - error code
4573 
4574      Level: advanced
4575 
4576 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4577 M*/
4578 
4579 /*@C
4580   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4581 
4582   Not collective
4583 
4584   Input Parameter:
4585 . A - The `MATMPIAIJ` matrix
4586 
4587   Output Parameters:
4588 + Ad - The local diagonal block as a `MATSEQAIJ` matrix
4589 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix
4590 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4591 
4592   Level: intermediate
4593 
4594   Note:
4595   The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4596   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4597   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4598   local column numbers to global column numbers in the original matrix.
4599 
4600   Fortran Note:
4601   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4602 
4603 .seealso: `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4604 @*/
4605 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4606 {
4607   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4608   PetscBool   flg;
4609 
4610   PetscFunctionBegin;
4611   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4612   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4613   if (Ad) *Ad = a->A;
4614   if (Ao) *Ao = a->B;
4615   if (colmap) *colmap = a->garray;
4616   PetscFunctionReturn(PETSC_SUCCESS);
4617 }
4618 
4619 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4620 {
4621   PetscInt     m, N, i, rstart, nnz, Ii;
4622   PetscInt    *indx;
4623   PetscScalar *values;
4624   MatType      rootType;
4625 
4626   PetscFunctionBegin;
4627   PetscCall(MatGetSize(inmat, &m, &N));
4628   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4629     PetscInt *dnz, *onz, sum, bs, cbs;
4630 
4631     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4632     /* Check sum(n) = N */
4633     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4634     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4635 
4636     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4637     rstart -= m;
4638 
4639     MatPreallocateBegin(comm, m, n, dnz, onz);
4640     for (i = 0; i < m; i++) {
4641       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4642       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4643       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4644     }
4645 
4646     PetscCall(MatCreate(comm, outmat));
4647     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4648     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4649     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4650     PetscCall(MatGetRootType_Private(inmat, &rootType));
4651     PetscCall(MatSetType(*outmat, rootType));
4652     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4653     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4654     MatPreallocateEnd(dnz, onz);
4655     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4656   }
4657 
4658   /* numeric phase */
4659   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4660   for (i = 0; i < m; i++) {
4661     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4662     Ii = i + rstart;
4663     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4664     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4665   }
4666   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4667   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4668   PetscFunctionReturn(PETSC_SUCCESS);
4669 }
4670 
4671 PetscErrorCode MatFileSplit(Mat A, char *outfile)
4672 {
4673   PetscMPIInt        rank;
4674   PetscInt           m, N, i, rstart, nnz;
4675   size_t             len;
4676   const PetscInt    *indx;
4677   PetscViewer        out;
4678   char              *name;
4679   Mat                B;
4680   const PetscScalar *values;
4681 
4682   PetscFunctionBegin;
4683   PetscCall(MatGetLocalSize(A, &m, NULL));
4684   PetscCall(MatGetSize(A, NULL, &N));
4685   /* Should this be the type of the diagonal block of A? */
4686   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4687   PetscCall(MatSetSizes(B, m, N, m, N));
4688   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4689   PetscCall(MatSetType(B, MATSEQAIJ));
4690   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4691   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4692   for (i = 0; i < m; i++) {
4693     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4694     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4695     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4696   }
4697   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4698   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4699 
4700   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4701   PetscCall(PetscStrlen(outfile, &len));
4702   PetscCall(PetscMalloc1(len + 6, &name));
4703   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4704   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4705   PetscCall(PetscFree(name));
4706   PetscCall(MatView(B, out));
4707   PetscCall(PetscViewerDestroy(&out));
4708   PetscCall(MatDestroy(&B));
4709   PetscFunctionReturn(PETSC_SUCCESS);
4710 }
4711 
4712 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4713 {
4714   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4715 
4716   PetscFunctionBegin;
4717   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4718   PetscCall(PetscFree(merge->id_r));
4719   PetscCall(PetscFree(merge->len_s));
4720   PetscCall(PetscFree(merge->len_r));
4721   PetscCall(PetscFree(merge->bi));
4722   PetscCall(PetscFree(merge->bj));
4723   PetscCall(PetscFree(merge->buf_ri[0]));
4724   PetscCall(PetscFree(merge->buf_ri));
4725   PetscCall(PetscFree(merge->buf_rj[0]));
4726   PetscCall(PetscFree(merge->buf_rj));
4727   PetscCall(PetscFree(merge->coi));
4728   PetscCall(PetscFree(merge->coj));
4729   PetscCall(PetscFree(merge->owners_co));
4730   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4731   PetscCall(PetscFree(merge));
4732   PetscFunctionReturn(PETSC_SUCCESS);
4733 }
4734 
4735 #include <../src/mat/utils/freespace.h>
4736 #include <petscbt.h>
4737 
4738 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4739 {
4740   MPI_Comm             comm;
4741   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4742   PetscMPIInt          size, rank, taga, *len_s;
4743   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4744   PetscInt             proc, m;
4745   PetscInt           **buf_ri, **buf_rj;
4746   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4747   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4748   MPI_Request         *s_waits, *r_waits;
4749   MPI_Status          *status;
4750   const MatScalar     *aa, *a_a;
4751   MatScalar          **abuf_r, *ba_i;
4752   Mat_Merge_SeqsToMPI *merge;
4753   PetscContainer       container;
4754 
4755   PetscFunctionBegin;
4756   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4757   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4758 
4759   PetscCallMPI(MPI_Comm_size(comm, &size));
4760   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4761 
4762   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4763   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4764   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4765   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4766   aa = a_a;
4767 
4768   bi     = merge->bi;
4769   bj     = merge->bj;
4770   buf_ri = merge->buf_ri;
4771   buf_rj = merge->buf_rj;
4772 
4773   PetscCall(PetscMalloc1(size, &status));
4774   owners = merge->rowmap->range;
4775   len_s  = merge->len_s;
4776 
4777   /* send and recv matrix values */
4778   /*-----------------------------*/
4779   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4780   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4781 
4782   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4783   for (proc = 0, k = 0; proc < size; proc++) {
4784     if (!len_s[proc]) continue;
4785     i = owners[proc];
4786     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4787     k++;
4788   }
4789 
4790   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4791   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4792   PetscCall(PetscFree(status));
4793 
4794   PetscCall(PetscFree(s_waits));
4795   PetscCall(PetscFree(r_waits));
4796 
4797   /* insert mat values of mpimat */
4798   /*----------------------------*/
4799   PetscCall(PetscMalloc1(N, &ba_i));
4800   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4801 
4802   for (k = 0; k < merge->nrecv; k++) {
4803     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4804     nrows       = *(buf_ri_k[k]);
4805     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4806     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4807   }
4808 
4809   /* set values of ba */
4810   m = merge->rowmap->n;
4811   for (i = 0; i < m; i++) {
4812     arow = owners[rank] + i;
4813     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4814     bnzi = bi[i + 1] - bi[i];
4815     PetscCall(PetscArrayzero(ba_i, bnzi));
4816 
4817     /* add local non-zero vals of this proc's seqmat into ba */
4818     anzi   = ai[arow + 1] - ai[arow];
4819     aj     = a->j + ai[arow];
4820     aa     = a_a + ai[arow];
4821     nextaj = 0;
4822     for (j = 0; nextaj < anzi; j++) {
4823       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4824         ba_i[j] += aa[nextaj++];
4825       }
4826     }
4827 
4828     /* add received vals into ba */
4829     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4830       /* i-th row */
4831       if (i == *nextrow[k]) {
4832         anzi   = *(nextai[k] + 1) - *nextai[k];
4833         aj     = buf_rj[k] + *(nextai[k]);
4834         aa     = abuf_r[k] + *(nextai[k]);
4835         nextaj = 0;
4836         for (j = 0; nextaj < anzi; j++) {
4837           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4838             ba_i[j] += aa[nextaj++];
4839           }
4840         }
4841         nextrow[k]++;
4842         nextai[k]++;
4843       }
4844     }
4845     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4846   }
4847   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4848   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4849   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4850 
4851   PetscCall(PetscFree(abuf_r[0]));
4852   PetscCall(PetscFree(abuf_r));
4853   PetscCall(PetscFree(ba_i));
4854   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4855   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4856   PetscFunctionReturn(PETSC_SUCCESS);
4857 }
4858 
4859 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4860 {
4861   Mat                  B_mpi;
4862   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4863   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4864   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4865   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4866   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4867   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4868   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4869   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4870   MPI_Status          *status;
4871   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4872   PetscBT              lnkbt;
4873   Mat_Merge_SeqsToMPI *merge;
4874   PetscContainer       container;
4875 
4876   PetscFunctionBegin;
4877   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4878 
4879   /* make sure it is a PETSc comm */
4880   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4881   PetscCallMPI(MPI_Comm_size(comm, &size));
4882   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4883 
4884   PetscCall(PetscNew(&merge));
4885   PetscCall(PetscMalloc1(size, &status));
4886 
4887   /* determine row ownership */
4888   /*---------------------------------------------------------*/
4889   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4890   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4891   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4892   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4893   PetscCall(PetscLayoutSetUp(merge->rowmap));
4894   PetscCall(PetscMalloc1(size, &len_si));
4895   PetscCall(PetscMalloc1(size, &merge->len_s));
4896 
4897   m      = merge->rowmap->n;
4898   owners = merge->rowmap->range;
4899 
4900   /* determine the number of messages to send, their lengths */
4901   /*---------------------------------------------------------*/
4902   len_s = merge->len_s;
4903 
4904   len          = 0; /* length of buf_si[] */
4905   merge->nsend = 0;
4906   for (proc = 0; proc < size; proc++) {
4907     len_si[proc] = 0;
4908     if (proc == rank) {
4909       len_s[proc] = 0;
4910     } else {
4911       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4912       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4913     }
4914     if (len_s[proc]) {
4915       merge->nsend++;
4916       nrows = 0;
4917       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4918         if (ai[i + 1] > ai[i]) nrows++;
4919       }
4920       len_si[proc] = 2 * (nrows + 1);
4921       len += len_si[proc];
4922     }
4923   }
4924 
4925   /* determine the number and length of messages to receive for ij-structure */
4926   /*-------------------------------------------------------------------------*/
4927   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4928   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4929 
4930   /* post the Irecv of j-structure */
4931   /*-------------------------------*/
4932   PetscCall(PetscCommGetNewTag(comm, &tagj));
4933   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4934 
4935   /* post the Isend of j-structure */
4936   /*--------------------------------*/
4937   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4938 
4939   for (proc = 0, k = 0; proc < size; proc++) {
4940     if (!len_s[proc]) continue;
4941     i = owners[proc];
4942     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4943     k++;
4944   }
4945 
4946   /* receives and sends of j-structure are complete */
4947   /*------------------------------------------------*/
4948   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4949   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4950 
4951   /* send and recv i-structure */
4952   /*---------------------------*/
4953   PetscCall(PetscCommGetNewTag(comm, &tagi));
4954   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4955 
4956   PetscCall(PetscMalloc1(len + 1, &buf_s));
4957   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4958   for (proc = 0, k = 0; proc < size; proc++) {
4959     if (!len_s[proc]) continue;
4960     /* form outgoing message for i-structure:
4961          buf_si[0]:                 nrows to be sent
4962                [1:nrows]:           row index (global)
4963                [nrows+1:2*nrows+1]: i-structure index
4964     */
4965     /*-------------------------------------------*/
4966     nrows       = len_si[proc] / 2 - 1;
4967     buf_si_i    = buf_si + nrows + 1;
4968     buf_si[0]   = nrows;
4969     buf_si_i[0] = 0;
4970     nrows       = 0;
4971     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4972       anzi = ai[i + 1] - ai[i];
4973       if (anzi) {
4974         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4975         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4976         nrows++;
4977       }
4978     }
4979     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4980     k++;
4981     buf_si += len_si[proc];
4982   }
4983 
4984   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4985   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4986 
4987   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4988   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4989 
4990   PetscCall(PetscFree(len_si));
4991   PetscCall(PetscFree(len_ri));
4992   PetscCall(PetscFree(rj_waits));
4993   PetscCall(PetscFree2(si_waits, sj_waits));
4994   PetscCall(PetscFree(ri_waits));
4995   PetscCall(PetscFree(buf_s));
4996   PetscCall(PetscFree(status));
4997 
4998   /* compute a local seq matrix in each processor */
4999   /*----------------------------------------------*/
5000   /* allocate bi array and free space for accumulating nonzero column info */
5001   PetscCall(PetscMalloc1(m + 1, &bi));
5002   bi[0] = 0;
5003 
5004   /* create and initialize a linked list */
5005   nlnk = N + 1;
5006   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
5007 
5008   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5009   len = ai[owners[rank + 1]] - ai[owners[rank]];
5010   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
5011 
5012   current_space = free_space;
5013 
5014   /* determine symbolic info for each local row */
5015   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
5016 
5017   for (k = 0; k < merge->nrecv; k++) {
5018     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5019     nrows       = *buf_ri_k[k];
5020     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
5021     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
5022   }
5023 
5024   MatPreallocateBegin(comm, m, n, dnz, onz);
5025   len = 0;
5026   for (i = 0; i < m; i++) {
5027     bnzi = 0;
5028     /* add local non-zero cols of this proc's seqmat into lnk */
5029     arow = owners[rank] + i;
5030     anzi = ai[arow + 1] - ai[arow];
5031     aj   = a->j + ai[arow];
5032     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5033     bnzi += nlnk;
5034     /* add received col data into lnk */
5035     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5036       if (i == *nextrow[k]) {            /* i-th row */
5037         anzi = *(nextai[k] + 1) - *nextai[k];
5038         aj   = buf_rj[k] + *nextai[k];
5039         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5040         bnzi += nlnk;
5041         nextrow[k]++;
5042         nextai[k]++;
5043       }
5044     }
5045     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5046 
5047     /* if free space is not available, make more free space */
5048     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5049     /* copy data into free space, then initialize lnk */
5050     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5051     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5052 
5053     current_space->array += bnzi;
5054     current_space->local_used += bnzi;
5055     current_space->local_remaining -= bnzi;
5056 
5057     bi[i + 1] = bi[i] + bnzi;
5058   }
5059 
5060   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5061 
5062   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5063   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5064   PetscCall(PetscLLDestroy(lnk, lnkbt));
5065 
5066   /* create symbolic parallel matrix B_mpi */
5067   /*---------------------------------------*/
5068   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5069   PetscCall(MatCreate(comm, &B_mpi));
5070   if (n == PETSC_DECIDE) {
5071     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5072   } else {
5073     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5074   }
5075   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5076   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5077   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5078   MatPreallocateEnd(dnz, onz);
5079   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5080 
5081   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5082   B_mpi->assembled = PETSC_FALSE;
5083   merge->bi        = bi;
5084   merge->bj        = bj;
5085   merge->buf_ri    = buf_ri;
5086   merge->buf_rj    = buf_rj;
5087   merge->coi       = NULL;
5088   merge->coj       = NULL;
5089   merge->owners_co = NULL;
5090 
5091   PetscCall(PetscCommDestroy(&comm));
5092 
5093   /* attach the supporting struct to B_mpi for reuse */
5094   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5095   PetscCall(PetscContainerSetPointer(container, merge));
5096   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5097   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5098   PetscCall(PetscContainerDestroy(&container));
5099   *mpimat = B_mpi;
5100 
5101   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5102   PetscFunctionReturn(PETSC_SUCCESS);
5103 }
5104 
5105 /*@C
5106       MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5107                  matrices from each processor
5108 
5109     Collective
5110 
5111    Input Parameters:
5112 +    comm - the communicators the parallel matrix will live on
5113 .    seqmat - the input sequential matrices
5114 .    m - number of local rows (or `PETSC_DECIDE`)
5115 .    n - number of local columns (or `PETSC_DECIDE`)
5116 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5117 
5118    Output Parameter:
5119 .    mpimat - the parallel matrix generated
5120 
5121     Level: advanced
5122 
5123    Note:
5124      The dimensions of the sequential matrix in each processor MUST be the same.
5125      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5126      destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5127 @*/
5128 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5129 {
5130   PetscMPIInt size;
5131 
5132   PetscFunctionBegin;
5133   PetscCallMPI(MPI_Comm_size(comm, &size));
5134   if (size == 1) {
5135     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5136     if (scall == MAT_INITIAL_MATRIX) {
5137       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5138     } else {
5139       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5140     }
5141     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5142     PetscFunctionReturn(PETSC_SUCCESS);
5143   }
5144   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5145   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5146   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5147   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5148   PetscFunctionReturn(PETSC_SUCCESS);
5149 }
5150 
5151 /*@
5152      MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5153           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5154           with `MatGetSize()`
5155 
5156     Not Collective
5157 
5158    Input Parameters:
5159 +    A - the matrix
5160 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5161 
5162    Output Parameter:
5163 .    A_loc - the local sequential matrix generated
5164 
5165     Level: developer
5166 
5167    Notes:
5168      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5169 
5170      Destroy the matrix with `MatDestroy()`
5171 
5172 .seealso: `MatMPIAIJGetLocalMat()`
5173 @*/
5174 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5175 {
5176   PetscBool mpi;
5177 
5178   PetscFunctionBegin;
5179   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5180   if (mpi) {
5181     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5182   } else {
5183     *A_loc = A;
5184     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5185   }
5186   PetscFunctionReturn(PETSC_SUCCESS);
5187 }
5188 
5189 /*@
5190      MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5191           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5192           with `MatGetSize()`
5193 
5194     Not Collective
5195 
5196    Input Parameters:
5197 +    A - the matrix
5198 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5199 
5200    Output Parameter:
5201 .    A_loc - the local sequential matrix generated
5202 
5203     Level: developer
5204 
5205    Notes:
5206      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5207 
5208      When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A.
5209      If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called.
5210      This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
5211      modify the values of the returned A_loc.
5212 
5213 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5214 @*/
5215 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5216 {
5217   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5218   Mat_SeqAIJ        *mat, *a, *b;
5219   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5220   const PetscScalar *aa, *ba, *aav, *bav;
5221   PetscScalar       *ca, *cam;
5222   PetscMPIInt        size;
5223   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5224   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5225   PetscBool          match;
5226 
5227   PetscFunctionBegin;
5228   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5229   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5230   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5231   if (size == 1) {
5232     if (scall == MAT_INITIAL_MATRIX) {
5233       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5234       *A_loc = mpimat->A;
5235     } else if (scall == MAT_REUSE_MATRIX) {
5236       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5237     }
5238     PetscFunctionReturn(PETSC_SUCCESS);
5239   }
5240 
5241   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5242   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5243   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5244   ai = a->i;
5245   aj = a->j;
5246   bi = b->i;
5247   bj = b->j;
5248   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5249   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5250   aa = aav;
5251   ba = bav;
5252   if (scall == MAT_INITIAL_MATRIX) {
5253     PetscCall(PetscMalloc1(1 + am, &ci));
5254     ci[0] = 0;
5255     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5256     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5257     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5258     k = 0;
5259     for (i = 0; i < am; i++) {
5260       ncols_o = bi[i + 1] - bi[i];
5261       ncols_d = ai[i + 1] - ai[i];
5262       /* off-diagonal portion of A */
5263       for (jo = 0; jo < ncols_o; jo++) {
5264         col = cmap[*bj];
5265         if (col >= cstart) break;
5266         cj[k] = col;
5267         bj++;
5268         ca[k++] = *ba++;
5269       }
5270       /* diagonal portion of A */
5271       for (j = 0; j < ncols_d; j++) {
5272         cj[k]   = cstart + *aj++;
5273         ca[k++] = *aa++;
5274       }
5275       /* off-diagonal portion of A */
5276       for (j = jo; j < ncols_o; j++) {
5277         cj[k]   = cmap[*bj++];
5278         ca[k++] = *ba++;
5279       }
5280     }
5281     /* put together the new matrix */
5282     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5283     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5284     /* Since these are PETSc arrays, change flags to free them as necessary. */
5285     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5286     mat->free_a  = PETSC_TRUE;
5287     mat->free_ij = PETSC_TRUE;
5288     mat->nonew   = 0;
5289   } else if (scall == MAT_REUSE_MATRIX) {
5290     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5291     ci  = mat->i;
5292     cj  = mat->j;
5293     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5294     for (i = 0; i < am; i++) {
5295       /* off-diagonal portion of A */
5296       ncols_o = bi[i + 1] - bi[i];
5297       for (jo = 0; jo < ncols_o; jo++) {
5298         col = cmap[*bj];
5299         if (col >= cstart) break;
5300         *cam++ = *ba++;
5301         bj++;
5302       }
5303       /* diagonal portion of A */
5304       ncols_d = ai[i + 1] - ai[i];
5305       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5306       /* off-diagonal portion of A */
5307       for (j = jo; j < ncols_o; j++) {
5308         *cam++ = *ba++;
5309         bj++;
5310       }
5311     }
5312     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5313   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5314   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5315   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5316   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5317   PetscFunctionReturn(PETSC_SUCCESS);
5318 }
5319 
5320 /*@
5321      MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5322           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5323 
5324     Not Collective
5325 
5326    Input Parameters:
5327 +    A - the matrix
5328 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5329 
5330    Output Parameters:
5331 +    glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5332 -    A_loc - the local sequential matrix generated
5333 
5334     Level: developer
5335 
5336    Note:
5337      This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering)
5338 
5339 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5340 @*/
5341 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5342 {
5343   Mat             Ao, Ad;
5344   const PetscInt *cmap;
5345   PetscMPIInt     size;
5346   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5347 
5348   PetscFunctionBegin;
5349   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5350   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5351   if (size == 1) {
5352     if (scall == MAT_INITIAL_MATRIX) {
5353       PetscCall(PetscObjectReference((PetscObject)Ad));
5354       *A_loc = Ad;
5355     } else if (scall == MAT_REUSE_MATRIX) {
5356       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5357     }
5358     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5359     PetscFunctionReturn(PETSC_SUCCESS);
5360   }
5361   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5362   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5363   if (f) {
5364     PetscCall((*f)(A, scall, glob, A_loc));
5365   } else {
5366     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5367     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5368     Mat_SeqAIJ        *c;
5369     PetscInt          *ai = a->i, *aj = a->j;
5370     PetscInt          *bi = b->i, *bj = b->j;
5371     PetscInt          *ci, *cj;
5372     const PetscScalar *aa, *ba;
5373     PetscScalar       *ca;
5374     PetscInt           i, j, am, dn, on;
5375 
5376     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5377     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5378     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5379     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5380     if (scall == MAT_INITIAL_MATRIX) {
5381       PetscInt k;
5382       PetscCall(PetscMalloc1(1 + am, &ci));
5383       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5384       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5385       ci[0] = 0;
5386       for (i = 0, k = 0; i < am; i++) {
5387         const PetscInt ncols_o = bi[i + 1] - bi[i];
5388         const PetscInt ncols_d = ai[i + 1] - ai[i];
5389         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5390         /* diagonal portion of A */
5391         for (j = 0; j < ncols_d; j++, k++) {
5392           cj[k] = *aj++;
5393           ca[k] = *aa++;
5394         }
5395         /* off-diagonal portion of A */
5396         for (j = 0; j < ncols_o; j++, k++) {
5397           cj[k] = dn + *bj++;
5398           ca[k] = *ba++;
5399         }
5400       }
5401       /* put together the new matrix */
5402       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5403       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5404       /* Since these are PETSc arrays, change flags to free them as necessary. */
5405       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5406       c->free_a  = PETSC_TRUE;
5407       c->free_ij = PETSC_TRUE;
5408       c->nonew   = 0;
5409       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5410     } else if (scall == MAT_REUSE_MATRIX) {
5411       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5412       for (i = 0; i < am; i++) {
5413         const PetscInt ncols_d = ai[i + 1] - ai[i];
5414         const PetscInt ncols_o = bi[i + 1] - bi[i];
5415         /* diagonal portion of A */
5416         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5417         /* off-diagonal portion of A */
5418         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5419       }
5420       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5421     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5422     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5423     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5424     if (glob) {
5425       PetscInt cst, *gidx;
5426 
5427       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5428       PetscCall(PetscMalloc1(dn + on, &gidx));
5429       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5430       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5431       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5432     }
5433   }
5434   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5435   PetscFunctionReturn(PETSC_SUCCESS);
5436 }
5437 
5438 /*@C
5439      MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5440 
5441     Not Collective
5442 
5443    Input Parameters:
5444 +    A - the matrix
5445 .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5446 -    row, col - index sets of rows and columns to extract (or NULL)
5447 
5448    Output Parameter:
5449 .    A_loc - the local sequential matrix generated
5450 
5451     Level: developer
5452 
5453 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5454 @*/
5455 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5456 {
5457   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5458   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5459   IS          isrowa, iscola;
5460   Mat        *aloc;
5461   PetscBool   match;
5462 
5463   PetscFunctionBegin;
5464   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5465   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5466   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5467   if (!row) {
5468     start = A->rmap->rstart;
5469     end   = A->rmap->rend;
5470     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5471   } else {
5472     isrowa = *row;
5473   }
5474   if (!col) {
5475     start = A->cmap->rstart;
5476     cmap  = a->garray;
5477     nzA   = a->A->cmap->n;
5478     nzB   = a->B->cmap->n;
5479     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5480     ncols = 0;
5481     for (i = 0; i < nzB; i++) {
5482       if (cmap[i] < start) idx[ncols++] = cmap[i];
5483       else break;
5484     }
5485     imark = i;
5486     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5487     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5488     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5489   } else {
5490     iscola = *col;
5491   }
5492   if (scall != MAT_INITIAL_MATRIX) {
5493     PetscCall(PetscMalloc1(1, &aloc));
5494     aloc[0] = *A_loc;
5495   }
5496   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5497   if (!col) { /* attach global id of condensed columns */
5498     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5499   }
5500   *A_loc = aloc[0];
5501   PetscCall(PetscFree(aloc));
5502   if (!row) PetscCall(ISDestroy(&isrowa));
5503   if (!col) PetscCall(ISDestroy(&iscola));
5504   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5505   PetscFunctionReturn(PETSC_SUCCESS);
5506 }
5507 
5508 /*
5509  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5510  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5511  * on a global size.
5512  * */
5513 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5514 {
5515   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5516   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5517   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5518   PetscMPIInt            owner;
5519   PetscSFNode           *iremote, *oiremote;
5520   const PetscInt        *lrowindices;
5521   PetscSF                sf, osf;
5522   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5523   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5524   MPI_Comm               comm;
5525   ISLocalToGlobalMapping mapping;
5526   const PetscScalar     *pd_a, *po_a;
5527 
5528   PetscFunctionBegin;
5529   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5530   /* plocalsize is the number of roots
5531    * nrows is the number of leaves
5532    * */
5533   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5534   PetscCall(ISGetLocalSize(rows, &nrows));
5535   PetscCall(PetscCalloc1(nrows, &iremote));
5536   PetscCall(ISGetIndices(rows, &lrowindices));
5537   for (i = 0; i < nrows; i++) {
5538     /* Find a remote index and an owner for a row
5539      * The row could be local or remote
5540      * */
5541     owner = 0;
5542     lidx  = 0;
5543     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5544     iremote[i].index = lidx;
5545     iremote[i].rank  = owner;
5546   }
5547   /* Create SF to communicate how many nonzero columns for each row */
5548   PetscCall(PetscSFCreate(comm, &sf));
5549   /* SF will figure out the number of nonzero colunms for each row, and their
5550    * offsets
5551    * */
5552   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5553   PetscCall(PetscSFSetFromOptions(sf));
5554   PetscCall(PetscSFSetUp(sf));
5555 
5556   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5557   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5558   PetscCall(PetscCalloc1(nrows, &pnnz));
5559   roffsets[0] = 0;
5560   roffsets[1] = 0;
5561   for (i = 0; i < plocalsize; i++) {
5562     /* diag */
5563     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5564     /* off diag */
5565     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5566     /* compute offsets so that we relative location for each row */
5567     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5568     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5569   }
5570   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5571   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5572   /* 'r' means root, and 'l' means leaf */
5573   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5574   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5575   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5576   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5577   PetscCall(PetscSFDestroy(&sf));
5578   PetscCall(PetscFree(roffsets));
5579   PetscCall(PetscFree(nrcols));
5580   dntotalcols = 0;
5581   ontotalcols = 0;
5582   ncol        = 0;
5583   for (i = 0; i < nrows; i++) {
5584     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5585     ncol    = PetscMax(pnnz[i], ncol);
5586     /* diag */
5587     dntotalcols += nlcols[i * 2 + 0];
5588     /* off diag */
5589     ontotalcols += nlcols[i * 2 + 1];
5590   }
5591   /* We do not need to figure the right number of columns
5592    * since all the calculations will be done by going through the raw data
5593    * */
5594   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5595   PetscCall(MatSetUp(*P_oth));
5596   PetscCall(PetscFree(pnnz));
5597   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5598   /* diag */
5599   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5600   /* off diag */
5601   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5602   /* diag */
5603   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5604   /* off diag */
5605   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5606   dntotalcols = 0;
5607   ontotalcols = 0;
5608   ntotalcols  = 0;
5609   for (i = 0; i < nrows; i++) {
5610     owner = 0;
5611     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5612     /* Set iremote for diag matrix */
5613     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5614       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5615       iremote[dntotalcols].rank  = owner;
5616       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5617       ilocal[dntotalcols++] = ntotalcols++;
5618     }
5619     /* off diag */
5620     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5621       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5622       oiremote[ontotalcols].rank  = owner;
5623       oilocal[ontotalcols++]      = ntotalcols++;
5624     }
5625   }
5626   PetscCall(ISRestoreIndices(rows, &lrowindices));
5627   PetscCall(PetscFree(loffsets));
5628   PetscCall(PetscFree(nlcols));
5629   PetscCall(PetscSFCreate(comm, &sf));
5630   /* P serves as roots and P_oth is leaves
5631    * Diag matrix
5632    * */
5633   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5634   PetscCall(PetscSFSetFromOptions(sf));
5635   PetscCall(PetscSFSetUp(sf));
5636 
5637   PetscCall(PetscSFCreate(comm, &osf));
5638   /* Off diag */
5639   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5640   PetscCall(PetscSFSetFromOptions(osf));
5641   PetscCall(PetscSFSetUp(osf));
5642   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5643   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5644   /* We operate on the matrix internal data for saving memory */
5645   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5646   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5647   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5648   /* Convert to global indices for diag matrix */
5649   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5650   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5651   /* We want P_oth store global indices */
5652   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5653   /* Use memory scalable approach */
5654   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5655   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5656   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5657   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5658   /* Convert back to local indices */
5659   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5660   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5661   nout = 0;
5662   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5663   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5664   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5665   /* Exchange values */
5666   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5667   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5668   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5669   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5670   /* Stop PETSc from shrinking memory */
5671   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5672   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5673   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5674   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5675   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5676   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5677   PetscCall(PetscSFDestroy(&sf));
5678   PetscCall(PetscSFDestroy(&osf));
5679   PetscFunctionReturn(PETSC_SUCCESS);
5680 }
5681 
5682 /*
5683  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5684  * This supports MPIAIJ and MAIJ
5685  * */
5686 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5687 {
5688   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5689   Mat_SeqAIJ *p_oth;
5690   IS          rows, map;
5691   PetscHMapI  hamp;
5692   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5693   MPI_Comm    comm;
5694   PetscSF     sf, osf;
5695   PetscBool   has;
5696 
5697   PetscFunctionBegin;
5698   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5699   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5700   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5701    *  and then create a submatrix (that often is an overlapping matrix)
5702    * */
5703   if (reuse == MAT_INITIAL_MATRIX) {
5704     /* Use a hash table to figure out unique keys */
5705     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5706     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5707     count = 0;
5708     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5709     for (i = 0; i < a->B->cmap->n; i++) {
5710       key = a->garray[i] / dof;
5711       PetscCall(PetscHMapIHas(hamp, key, &has));
5712       if (!has) {
5713         mapping[i] = count;
5714         PetscCall(PetscHMapISet(hamp, key, count++));
5715       } else {
5716         /* Current 'i' has the same value the previous step */
5717         mapping[i] = count - 1;
5718       }
5719     }
5720     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5721     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5722     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5723     PetscCall(PetscCalloc1(htsize, &rowindices));
5724     off = 0;
5725     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5726     PetscCall(PetscHMapIDestroy(&hamp));
5727     PetscCall(PetscSortInt(htsize, rowindices));
5728     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5729     /* In case, the matrix was already created but users want to recreate the matrix */
5730     PetscCall(MatDestroy(P_oth));
5731     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5732     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5733     PetscCall(ISDestroy(&map));
5734     PetscCall(ISDestroy(&rows));
5735   } else if (reuse == MAT_REUSE_MATRIX) {
5736     /* If matrix was already created, we simply update values using SF objects
5737      * that as attached to the matrix earlier.
5738      */
5739     const PetscScalar *pd_a, *po_a;
5740 
5741     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5742     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5743     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5744     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5745     /* Update values in place */
5746     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5747     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5748     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5749     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5750     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5751     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5752     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5753     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5754   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5755   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5756   PetscFunctionReturn(PETSC_SUCCESS);
5757 }
5758 
5759 /*@C
5760   MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A
5761 
5762   Collective
5763 
5764   Input Parameters:
5765 + A - the first matrix in `MATMPIAIJ` format
5766 . B - the second matrix in `MATMPIAIJ` format
5767 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5768 
5769   Output Parameters:
5770 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5771 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5772 - B_seq - the sequential matrix generated
5773 
5774   Level: developer
5775 
5776 @*/
5777 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5778 {
5779   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5780   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5781   IS          isrowb, iscolb;
5782   Mat        *bseq = NULL;
5783 
5784   PetscFunctionBegin;
5785   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5786     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5787   }
5788   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5789 
5790   if (scall == MAT_INITIAL_MATRIX) {
5791     start = A->cmap->rstart;
5792     cmap  = a->garray;
5793     nzA   = a->A->cmap->n;
5794     nzB   = a->B->cmap->n;
5795     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5796     ncols = 0;
5797     for (i = 0; i < nzB; i++) { /* row < local row index */
5798       if (cmap[i] < start) idx[ncols++] = cmap[i];
5799       else break;
5800     }
5801     imark = i;
5802     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5803     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5804     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5805     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5806   } else {
5807     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5808     isrowb = *rowb;
5809     iscolb = *colb;
5810     PetscCall(PetscMalloc1(1, &bseq));
5811     bseq[0] = *B_seq;
5812   }
5813   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5814   *B_seq = bseq[0];
5815   PetscCall(PetscFree(bseq));
5816   if (!rowb) {
5817     PetscCall(ISDestroy(&isrowb));
5818   } else {
5819     *rowb = isrowb;
5820   }
5821   if (!colb) {
5822     PetscCall(ISDestroy(&iscolb));
5823   } else {
5824     *colb = iscolb;
5825   }
5826   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5827   PetscFunctionReturn(PETSC_SUCCESS);
5828 }
5829 
5830 /*
5831     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5832     of the OFF-DIAGONAL portion of local A
5833 
5834     Collective
5835 
5836    Input Parameters:
5837 +    A,B - the matrices in mpiaij format
5838 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5839 
5840    Output Parameter:
5841 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5842 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5843 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5844 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5845 
5846     Developer Note:
5847     This directly accesses information inside the VecScatter associated with the matrix-vector product
5848      for this matrix. This is not desirable..
5849 
5850     Level: developer
5851 
5852 */
5853 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5854 {
5855   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5856   Mat_SeqAIJ        *b_oth;
5857   VecScatter         ctx;
5858   MPI_Comm           comm;
5859   const PetscMPIInt *rprocs, *sprocs;
5860   const PetscInt    *srow, *rstarts, *sstarts;
5861   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5862   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5863   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5864   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5865   PetscMPIInt        size, tag, rank, nreqs;
5866 
5867   PetscFunctionBegin;
5868   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5869   PetscCallMPI(MPI_Comm_size(comm, &size));
5870 
5871   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5872     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5873   }
5874   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5875   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5876 
5877   if (size == 1) {
5878     startsj_s = NULL;
5879     bufa_ptr  = NULL;
5880     *B_oth    = NULL;
5881     PetscFunctionReturn(PETSC_SUCCESS);
5882   }
5883 
5884   ctx = a->Mvctx;
5885   tag = ((PetscObject)ctx)->tag;
5886 
5887   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5888   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5889   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5890   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5891   PetscCall(PetscMalloc1(nreqs, &reqs));
5892   rwaits = reqs;
5893   swaits = reqs + nrecvs;
5894 
5895   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5896   if (scall == MAT_INITIAL_MATRIX) {
5897     /* i-array */
5898     /*---------*/
5899     /*  post receives */
5900     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5901     for (i = 0; i < nrecvs; i++) {
5902       rowlen = rvalues + rstarts[i] * rbs;
5903       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5904       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5905     }
5906 
5907     /* pack the outgoing message */
5908     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5909 
5910     sstartsj[0] = 0;
5911     rstartsj[0] = 0;
5912     len         = 0; /* total length of j or a array to be sent */
5913     if (nsends) {
5914       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5915       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5916     }
5917     for (i = 0; i < nsends; i++) {
5918       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5919       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5920       for (j = 0; j < nrows; j++) {
5921         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5922         for (l = 0; l < sbs; l++) {
5923           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5924 
5925           rowlen[j * sbs + l] = ncols;
5926 
5927           len += ncols;
5928           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5929         }
5930         k++;
5931       }
5932       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5933 
5934       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5935     }
5936     /* recvs and sends of i-array are completed */
5937     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5938     PetscCall(PetscFree(svalues));
5939 
5940     /* allocate buffers for sending j and a arrays */
5941     PetscCall(PetscMalloc1(len + 1, &bufj));
5942     PetscCall(PetscMalloc1(len + 1, &bufa));
5943 
5944     /* create i-array of B_oth */
5945     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5946 
5947     b_othi[0] = 0;
5948     len       = 0; /* total length of j or a array to be received */
5949     k         = 0;
5950     for (i = 0; i < nrecvs; i++) {
5951       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5952       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5953       for (j = 0; j < nrows; j++) {
5954         b_othi[k + 1] = b_othi[k] + rowlen[j];
5955         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5956         k++;
5957       }
5958       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5959     }
5960     PetscCall(PetscFree(rvalues));
5961 
5962     /* allocate space for j and a arrays of B_oth */
5963     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5964     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5965 
5966     /* j-array */
5967     /*---------*/
5968     /*  post receives of j-array */
5969     for (i = 0; i < nrecvs; i++) {
5970       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5971       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5972     }
5973 
5974     /* pack the outgoing message j-array */
5975     if (nsends) k = sstarts[0];
5976     for (i = 0; i < nsends; i++) {
5977       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5978       bufJ  = bufj + sstartsj[i];
5979       for (j = 0; j < nrows; j++) {
5980         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5981         for (ll = 0; ll < sbs; ll++) {
5982           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5983           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5984           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5985         }
5986       }
5987       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5988     }
5989 
5990     /* recvs and sends of j-array are completed */
5991     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5992   } else if (scall == MAT_REUSE_MATRIX) {
5993     sstartsj = *startsj_s;
5994     rstartsj = *startsj_r;
5995     bufa     = *bufa_ptr;
5996     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5997     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5998   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5999 
6000   /* a-array */
6001   /*---------*/
6002   /*  post receives of a-array */
6003   for (i = 0; i < nrecvs; i++) {
6004     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
6005     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
6006   }
6007 
6008   /* pack the outgoing message a-array */
6009   if (nsends) k = sstarts[0];
6010   for (i = 0; i < nsends; i++) {
6011     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
6012     bufA  = bufa + sstartsj[i];
6013     for (j = 0; j < nrows; j++) {
6014       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
6015       for (ll = 0; ll < sbs; ll++) {
6016         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6017         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
6018         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6019       }
6020     }
6021     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6022   }
6023   /* recvs and sends of a-array are completed */
6024   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6025   PetscCall(PetscFree(reqs));
6026 
6027   if (scall == MAT_INITIAL_MATRIX) {
6028     /* put together the new matrix */
6029     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6030 
6031     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6032     /* Since these are PETSc arrays, change flags to free them as necessary. */
6033     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6034     b_oth->free_a  = PETSC_TRUE;
6035     b_oth->free_ij = PETSC_TRUE;
6036     b_oth->nonew   = 0;
6037 
6038     PetscCall(PetscFree(bufj));
6039     if (!startsj_s || !bufa_ptr) {
6040       PetscCall(PetscFree2(sstartsj, rstartsj));
6041       PetscCall(PetscFree(bufa_ptr));
6042     } else {
6043       *startsj_s = sstartsj;
6044       *startsj_r = rstartsj;
6045       *bufa_ptr  = bufa;
6046     }
6047   } else if (scall == MAT_REUSE_MATRIX) {
6048     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6049   }
6050 
6051   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6052   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6053   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6054   PetscFunctionReturn(PETSC_SUCCESS);
6055 }
6056 
6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6059 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6060 #if defined(PETSC_HAVE_MKL_SPARSE)
6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6062 #endif
6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6064 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6065 #if defined(PETSC_HAVE_ELEMENTAL)
6066 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6067 #endif
6068 #if defined(PETSC_HAVE_SCALAPACK)
6069 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6070 #endif
6071 #if defined(PETSC_HAVE_HYPRE)
6072 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6073 #endif
6074 #if defined(PETSC_HAVE_CUDA)
6075 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6076 #endif
6077 #if defined(PETSC_HAVE_HIP)
6078 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6079 #endif
6080 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6081 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6082 #endif
6083 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6084 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6085 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6086 
6087 /*
6088     Computes (B'*A')' since computing B*A directly is untenable
6089 
6090                n                       p                          p
6091         [             ]       [             ]         [                 ]
6092       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6093         [             ]       [             ]         [                 ]
6094 
6095 */
6096 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6097 {
6098   Mat At, Bt, Ct;
6099 
6100   PetscFunctionBegin;
6101   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6102   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6103   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6104   PetscCall(MatDestroy(&At));
6105   PetscCall(MatDestroy(&Bt));
6106   PetscCall(MatTransposeSetPrecursor(Ct, C));
6107   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6108   PetscCall(MatDestroy(&Ct));
6109   PetscFunctionReturn(PETSC_SUCCESS);
6110 }
6111 
6112 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6113 {
6114   PetscBool cisdense;
6115 
6116   PetscFunctionBegin;
6117   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6118   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6119   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6120   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6121   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6122   PetscCall(MatSetUp(C));
6123 
6124   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6125   PetscFunctionReturn(PETSC_SUCCESS);
6126 }
6127 
6128 /* ----------------------------------------------------------------*/
6129 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6130 {
6131   Mat_Product *product = C->product;
6132   Mat          A = product->A, B = product->B;
6133 
6134   PetscFunctionBegin;
6135   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6136     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6137 
6138   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6139   C->ops->productsymbolic = MatProductSymbolic_AB;
6140   PetscFunctionReturn(PETSC_SUCCESS);
6141 }
6142 
6143 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6144 {
6145   Mat_Product *product = C->product;
6146 
6147   PetscFunctionBegin;
6148   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6149   PetscFunctionReturn(PETSC_SUCCESS);
6150 }
6151 
6152 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6153 
6154   Input Parameters:
6155 
6156     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6157     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6158 
6159     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6160 
6161     For Set1, j1[] contains column indices of the nonzeros.
6162     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6163     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6164     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6165 
6166     Similar for Set2.
6167 
6168     This routine merges the two sets of nonzeros row by row and removes repeats.
6169 
6170   Output Parameters: (memory is allocated by the caller)
6171 
6172     i[],j[]: the CSR of the merged matrix, which has m rows.
6173     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6174     imap2[]: similar to imap1[], but for Set2.
6175     Note we order nonzeros row-by-row and from left to right.
6176 */
6177 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6178 {
6179   PetscInt   r, m; /* Row index of mat */
6180   PetscCount t, t1, t2, b1, e1, b2, e2;
6181 
6182   PetscFunctionBegin;
6183   PetscCall(MatGetLocalSize(mat, &m, NULL));
6184   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6185   i[0]        = 0;
6186   for (r = 0; r < m; r++) { /* Do row by row merging */
6187     b1 = rowBegin1[r];
6188     e1 = rowEnd1[r];
6189     b2 = rowBegin2[r];
6190     e2 = rowEnd2[r];
6191     while (b1 < e1 && b2 < e2) {
6192       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6193         j[t]      = j1[b1];
6194         imap1[t1] = t;
6195         imap2[t2] = t;
6196         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6197         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6198         t1++;
6199         t2++;
6200         t++;
6201       } else if (j1[b1] < j2[b2]) {
6202         j[t]      = j1[b1];
6203         imap1[t1] = t;
6204         b1 += jmap1[t1 + 1] - jmap1[t1];
6205         t1++;
6206         t++;
6207       } else {
6208         j[t]      = j2[b2];
6209         imap2[t2] = t;
6210         b2 += jmap2[t2 + 1] - jmap2[t2];
6211         t2++;
6212         t++;
6213       }
6214     }
6215     /* Merge the remaining in either j1[] or j2[] */
6216     while (b1 < e1) {
6217       j[t]      = j1[b1];
6218       imap1[t1] = t;
6219       b1 += jmap1[t1 + 1] - jmap1[t1];
6220       t1++;
6221       t++;
6222     }
6223     while (b2 < e2) {
6224       j[t]      = j2[b2];
6225       imap2[t2] = t;
6226       b2 += jmap2[t2 + 1] - jmap2[t2];
6227       t2++;
6228       t++;
6229     }
6230     i[r + 1] = t;
6231   }
6232   PetscFunctionReturn(PETSC_SUCCESS);
6233 }
6234 
6235 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6236 
6237   Input Parameters:
6238     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6239     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6240       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6241 
6242       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6243       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6244 
6245   Output Parameters:
6246     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6247     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6248       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6249       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6250 
6251     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6252       Atot: number of entries belonging to the diagonal block.
6253       Annz: number of unique nonzeros belonging to the diagonal block.
6254       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6255         repeats (i.e., same 'i,j' pair).
6256       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6257         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6258 
6259       Atot: number of entries belonging to the diagonal block
6260       Annz: number of unique nonzeros belonging to the diagonal block.
6261 
6262     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6263 
6264     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6265 */
6266 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6267 {
6268   PetscInt    cstart, cend, rstart, rend, row, col;
6269   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6270   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6271   PetscCount  k, m, p, q, r, s, mid;
6272   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6273 
6274   PetscFunctionBegin;
6275   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6276   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6277   m = rend - rstart;
6278 
6279   for (k = 0; k < n; k++) {
6280     if (i[k] >= 0) break;
6281   } /* Skip negative rows */
6282 
6283   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6284      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6285   */
6286   while (k < n) {
6287     row = i[k];
6288     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6289     for (s = k; s < n; s++)
6290       if (i[s] != row) break;
6291     for (p = k; p < s; p++) {
6292       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6293       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6294     }
6295     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6296     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6297     rowBegin[row - rstart] = k;
6298     rowMid[row - rstart]   = mid;
6299     rowEnd[row - rstart]   = s;
6300 
6301     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6302     Atot += mid - k;
6303     Btot += s - mid;
6304 
6305     /* Count unique nonzeros of this diag/offdiag row */
6306     for (p = k; p < mid;) {
6307       col = j[p];
6308       do {
6309         j[p] += PETSC_MAX_INT;
6310         p++;
6311       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6312       Annz++;
6313     }
6314 
6315     for (p = mid; p < s;) {
6316       col = j[p];
6317       do {
6318         p++;
6319       } while (p < s && j[p] == col);
6320       Bnnz++;
6321     }
6322     k = s;
6323   }
6324 
6325   /* Allocation according to Atot, Btot, Annz, Bnnz */
6326   PetscCall(PetscMalloc1(Atot, &Aperm));
6327   PetscCall(PetscMalloc1(Btot, &Bperm));
6328   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6329   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6330 
6331   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6332   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6333   for (r = 0; r < m; r++) {
6334     k   = rowBegin[r];
6335     mid = rowMid[r];
6336     s   = rowEnd[r];
6337     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6338     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6339     Atot += mid - k;
6340     Btot += s - mid;
6341 
6342     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6343     for (p = k; p < mid;) {
6344       col = j[p];
6345       q   = p;
6346       do {
6347         p++;
6348       } while (p < mid && j[p] == col);
6349       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6350       Annz++;
6351     }
6352 
6353     for (p = mid; p < s;) {
6354       col = j[p];
6355       q   = p;
6356       do {
6357         p++;
6358       } while (p < s && j[p] == col);
6359       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6360       Bnnz++;
6361     }
6362   }
6363   /* Output */
6364   *Aperm_ = Aperm;
6365   *Annz_  = Annz;
6366   *Atot_  = Atot;
6367   *Ajmap_ = Ajmap;
6368   *Bperm_ = Bperm;
6369   *Bnnz_  = Bnnz;
6370   *Btot_  = Btot;
6371   *Bjmap_ = Bjmap;
6372   PetscFunctionReturn(PETSC_SUCCESS);
6373 }
6374 
6375 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6376 
6377   Input Parameters:
6378     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6379     nnz:  number of unique nonzeros in the merged matrix
6380     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6381     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6382 
6383   Output Parameter: (memory is allocated by the caller)
6384     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6385 
6386   Example:
6387     nnz1 = 4
6388     nnz  = 6
6389     imap = [1,3,4,5]
6390     jmap = [0,3,5,6,7]
6391    then,
6392     jmap_new = [0,0,3,3,5,6,7]
6393 */
6394 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6395 {
6396   PetscCount k, p;
6397 
6398   PetscFunctionBegin;
6399   jmap_new[0] = 0;
6400   p           = nnz;                /* p loops over jmap_new[] backwards */
6401   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6402     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6403   }
6404   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6405   PetscFunctionReturn(PETSC_SUCCESS);
6406 }
6407 
6408 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6409 {
6410   MPI_Comm    comm;
6411   PetscMPIInt rank, size;
6412   PetscInt    m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6413   PetscCount  k, p, q, rem;                           /* Loop variables over coo arrays */
6414   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
6415 
6416   PetscFunctionBegin;
6417   PetscCall(PetscFree(mpiaij->garray));
6418   PetscCall(VecDestroy(&mpiaij->lvec));
6419 #if defined(PETSC_USE_CTABLE)
6420   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6421 #else
6422   PetscCall(PetscFree(mpiaij->colmap));
6423 #endif
6424   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6425   mat->assembled     = PETSC_FALSE;
6426   mat->was_assembled = PETSC_FALSE;
6427   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6428 
6429   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6430   PetscCallMPI(MPI_Comm_size(comm, &size));
6431   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6432   PetscCall(PetscLayoutSetUp(mat->rmap));
6433   PetscCall(PetscLayoutSetUp(mat->cmap));
6434   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6435   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6436   PetscCall(MatGetLocalSize(mat, &m, &n));
6437   PetscCall(MatGetSize(mat, &M, &N));
6438 
6439   /* ---------------------------------------------------------------------------*/
6440   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6441   /* entries come first, then local rows, then remote rows.                     */
6442   /* ---------------------------------------------------------------------------*/
6443   PetscCount n1 = coo_n, *perm1;
6444   PetscInt  *i1 = coo_i, *j1 = coo_j;
6445 
6446   PetscCall(PetscMalloc1(n1, &perm1));
6447   for (k = 0; k < n1; k++) perm1[k] = k;
6448 
6449   /* Manipulate indices so that entries with negative row or col indices will have smallest
6450      row indices, local entries will have greater but negative row indices, and remote entries
6451      will have positive row indices.
6452   */
6453   for (k = 0; k < n1; k++) {
6454     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6455     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6456     else {
6457       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6458       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6459     }
6460   }
6461 
6462   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6463   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6464   for (k = 0; k < n1; k++) {
6465     if (i1[k] > PETSC_MIN_INT) break;
6466   }                                                                               /* Advance k to the first entry we need to take care of */
6467   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6468   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6469 
6470   /* ---------------------------------------------------------------------------*/
6471   /*           Split local rows into diag/offdiag portions                      */
6472   /* ---------------------------------------------------------------------------*/
6473   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6474   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6475   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6476 
6477   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6478   PetscCall(PetscMalloc1(n1 - rem, &Cperm1));
6479   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6480 
6481   /* ---------------------------------------------------------------------------*/
6482   /*           Send remote rows to their owner                                  */
6483   /* ---------------------------------------------------------------------------*/
6484   /* Find which rows should be sent to which remote ranks*/
6485   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6486   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6487   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6488   const PetscInt *ranges;
6489   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6490 
6491   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6492   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6493   for (k = rem; k < n1;) {
6494     PetscMPIInt owner;
6495     PetscInt    firstRow, lastRow;
6496 
6497     /* Locate a row range */
6498     firstRow = i1[k]; /* first row of this owner */
6499     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6500     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6501 
6502     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6503     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6504 
6505     /* All entries in [k,p) belong to this remote owner */
6506     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6507       PetscMPIInt *sendto2;
6508       PetscInt    *nentries2;
6509       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6510 
6511       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6512       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6513       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6514       PetscCall(PetscFree2(sendto, nentries2));
6515       sendto   = sendto2;
6516       nentries = nentries2;
6517       maxNsend = maxNsend2;
6518     }
6519     sendto[nsend]   = owner;
6520     nentries[nsend] = p - k;
6521     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6522     nsend++;
6523     k = p;
6524   }
6525 
6526   /* Build 1st SF to know offsets on remote to send data */
6527   PetscSF      sf1;
6528   PetscInt     nroots = 1, nroots2 = 0;
6529   PetscInt     nleaves = nsend, nleaves2 = 0;
6530   PetscInt    *offsets;
6531   PetscSFNode *iremote;
6532 
6533   PetscCall(PetscSFCreate(comm, &sf1));
6534   PetscCall(PetscMalloc1(nsend, &iremote));
6535   PetscCall(PetscMalloc1(nsend, &offsets));
6536   for (k = 0; k < nsend; k++) {
6537     iremote[k].rank  = sendto[k];
6538     iremote[k].index = 0;
6539     nleaves2 += nentries[k];
6540     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6541   }
6542   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6543   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6544   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6545   PetscCall(PetscSFDestroy(&sf1));
6546   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6547 
6548   /* Build 2nd SF to send remote COOs to their owner */
6549   PetscSF sf2;
6550   nroots  = nroots2;
6551   nleaves = nleaves2;
6552   PetscCall(PetscSFCreate(comm, &sf2));
6553   PetscCall(PetscSFSetFromOptions(sf2));
6554   PetscCall(PetscMalloc1(nleaves, &iremote));
6555   p = 0;
6556   for (k = 0; k < nsend; k++) {
6557     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6558     for (q = 0; q < nentries[k]; q++, p++) {
6559       iremote[p].rank  = sendto[k];
6560       iremote[p].index = offsets[k] + q;
6561     }
6562   }
6563   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6564 
6565   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6566   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem));
6567 
6568   /* Send the remote COOs to their owner */
6569   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6570   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6571   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6572   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6573   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6574   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6575   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6576 
6577   PetscCall(PetscFree(offsets));
6578   PetscCall(PetscFree2(sendto, nentries));
6579 
6580   /* ---------------------------------------------------------------*/
6581   /* Sort received COOs by row along with the permutation array     */
6582   /* ---------------------------------------------------------------*/
6583   for (k = 0; k < n2; k++) perm2[k] = k;
6584   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6585 
6586   /* ---------------------------------------------------------------*/
6587   /* Split received COOs into diag/offdiag portions                 */
6588   /* ---------------------------------------------------------------*/
6589   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6590   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6591   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6592 
6593   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6594   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6595 
6596   /* --------------------------------------------------------------------------*/
6597   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6598   /* --------------------------------------------------------------------------*/
6599   PetscInt *Ai, *Bi;
6600   PetscInt *Aj, *Bj;
6601 
6602   PetscCall(PetscMalloc1(m + 1, &Ai));
6603   PetscCall(PetscMalloc1(m + 1, &Bi));
6604   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6605   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6606 
6607   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6608   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6609   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6610   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6611   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6612 
6613   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6614   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6615 
6616   /* --------------------------------------------------------------------------*/
6617   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6618   /* expect nonzeros in A/B most likely have local contributing entries        */
6619   /* --------------------------------------------------------------------------*/
6620   PetscInt    Annz = Ai[m];
6621   PetscInt    Bnnz = Bi[m];
6622   PetscCount *Ajmap1_new, *Bjmap1_new;
6623 
6624   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6625   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6626 
6627   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6628   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6629 
6630   PetscCall(PetscFree(Aimap1));
6631   PetscCall(PetscFree(Ajmap1));
6632   PetscCall(PetscFree(Bimap1));
6633   PetscCall(PetscFree(Bjmap1));
6634   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6635   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6636   PetscCall(PetscFree(perm1));
6637   PetscCall(PetscFree3(i2, j2, perm2));
6638 
6639   Ajmap1 = Ajmap1_new;
6640   Bjmap1 = Bjmap1_new;
6641 
6642   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6643   if (Annz < Annz1 + Annz2) {
6644     PetscInt *Aj_new;
6645     PetscCall(PetscMalloc1(Annz, &Aj_new));
6646     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6647     PetscCall(PetscFree(Aj));
6648     Aj = Aj_new;
6649   }
6650 
6651   if (Bnnz < Bnnz1 + Bnnz2) {
6652     PetscInt *Bj_new;
6653     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6654     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6655     PetscCall(PetscFree(Bj));
6656     Bj = Bj_new;
6657   }
6658 
6659   /* --------------------------------------------------------------------------------*/
6660   /* Create new submatrices for on-process and off-process coupling                  */
6661   /* --------------------------------------------------------------------------------*/
6662   PetscScalar *Aa, *Ba;
6663   MatType      rtype;
6664   Mat_SeqAIJ  *a, *b;
6665   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6666   PetscCall(PetscCalloc1(Bnnz, &Ba));
6667   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6668   if (cstart) {
6669     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6670   }
6671   PetscCall(MatDestroy(&mpiaij->A));
6672   PetscCall(MatDestroy(&mpiaij->B));
6673   PetscCall(MatGetRootType_Private(mat, &rtype));
6674   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6675   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6676   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6677 
6678   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6679   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6680   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6681   a->free_a = b->free_a = PETSC_TRUE;
6682   a->free_ij = b->free_ij = PETSC_TRUE;
6683 
6684   /* conversion must happen AFTER multiply setup */
6685   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6686   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6687   PetscCall(VecDestroy(&mpiaij->lvec));
6688   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6689 
6690   mpiaij->coo_n   = coo_n;
6691   mpiaij->coo_sf  = sf2;
6692   mpiaij->sendlen = nleaves;
6693   mpiaij->recvlen = nroots;
6694 
6695   mpiaij->Annz = Annz;
6696   mpiaij->Bnnz = Bnnz;
6697 
6698   mpiaij->Annz2 = Annz2;
6699   mpiaij->Bnnz2 = Bnnz2;
6700 
6701   mpiaij->Atot1 = Atot1;
6702   mpiaij->Atot2 = Atot2;
6703   mpiaij->Btot1 = Btot1;
6704   mpiaij->Btot2 = Btot2;
6705 
6706   mpiaij->Ajmap1 = Ajmap1;
6707   mpiaij->Aperm1 = Aperm1;
6708 
6709   mpiaij->Bjmap1 = Bjmap1;
6710   mpiaij->Bperm1 = Bperm1;
6711 
6712   mpiaij->Aimap2 = Aimap2;
6713   mpiaij->Ajmap2 = Ajmap2;
6714   mpiaij->Aperm2 = Aperm2;
6715 
6716   mpiaij->Bimap2 = Bimap2;
6717   mpiaij->Bjmap2 = Bjmap2;
6718   mpiaij->Bperm2 = Bperm2;
6719 
6720   mpiaij->Cperm1 = Cperm1;
6721 
6722   /* Allocate in preallocation. If not used, it has zero cost on host */
6723   PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf));
6724   PetscFunctionReturn(PETSC_SUCCESS);
6725 }
6726 
6727 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6728 {
6729   Mat_MPIAIJ       *mpiaij = (Mat_MPIAIJ *)mat->data;
6730   Mat               A = mpiaij->A, B = mpiaij->B;
6731   PetscCount        Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2;
6732   PetscScalar      *Aa, *Ba;
6733   PetscScalar      *sendbuf = mpiaij->sendbuf;
6734   PetscScalar      *recvbuf = mpiaij->recvbuf;
6735   const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2;
6736   const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2;
6737   const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2;
6738   const PetscCount *Cperm1 = mpiaij->Cperm1;
6739 
6740   PetscFunctionBegin;
6741   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6742   PetscCall(MatSeqAIJGetArray(B, &Ba));
6743 
6744   /* Pack entries to be sent to remote */
6745   for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6746 
6747   /* Send remote entries to their owner and overlap the communication with local computation */
6748   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6749   /* Add local entries to A and B */
6750   for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6751     PetscScalar sum = 0.0;                /* Do partial summation first to improve numerical stability */
6752     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6753     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6754   }
6755   for (PetscCount i = 0; i < Bnnz; i++) {
6756     PetscScalar sum = 0.0;
6757     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6758     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6759   }
6760   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6761 
6762   /* Add received remote entries to A and B */
6763   for (PetscCount i = 0; i < Annz2; i++) {
6764     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6765   }
6766   for (PetscCount i = 0; i < Bnnz2; i++) {
6767     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6768   }
6769   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6770   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6771   PetscFunctionReturn(PETSC_SUCCESS);
6772 }
6773 
6774 /*MC
6775    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6776 
6777    Options Database Keys:
6778 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6779 
6780    Level: beginner
6781 
6782    Notes:
6783     `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values,
6784     in this case the values associated with the rows and columns one passes in are set to zero
6785     in the matrix
6786 
6787     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6788     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6789 
6790 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6791 M*/
6792 
6793 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6794 {
6795   Mat_MPIAIJ *b;
6796   PetscMPIInt size;
6797 
6798   PetscFunctionBegin;
6799   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6800 
6801   PetscCall(PetscNew(&b));
6802   B->data = (void *)b;
6803   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
6804   B->assembled  = PETSC_FALSE;
6805   B->insertmode = NOT_SET_VALUES;
6806   b->size       = size;
6807 
6808   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6809 
6810   /* build cache for off array entries formed */
6811   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6812 
6813   b->donotstash  = PETSC_FALSE;
6814   b->colmap      = NULL;
6815   b->garray      = NULL;
6816   b->roworiented = PETSC_TRUE;
6817 
6818   /* stuff used for matrix vector multiply */
6819   b->lvec  = NULL;
6820   b->Mvctx = NULL;
6821 
6822   /* stuff for MatGetRow() */
6823   b->rowindices   = NULL;
6824   b->rowvalues    = NULL;
6825   b->getrowactive = PETSC_FALSE;
6826 
6827   /* flexible pointer used in CUSPARSE classes */
6828   b->spptr = NULL;
6829 
6830   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6831   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6832   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6833   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6834   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6835   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6836   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6837   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6838   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6839   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6840 #if defined(PETSC_HAVE_CUDA)
6841   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6842 #endif
6843 #if defined(PETSC_HAVE_HIP)
6844   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6845 #endif
6846 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6847   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6848 #endif
6849 #if defined(PETSC_HAVE_MKL_SPARSE)
6850   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6851 #endif
6852   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6853   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6854   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6855   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6856 #if defined(PETSC_HAVE_ELEMENTAL)
6857   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6858 #endif
6859 #if defined(PETSC_HAVE_SCALAPACK)
6860   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6861 #endif
6862   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6863   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6864 #if defined(PETSC_HAVE_HYPRE)
6865   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6866   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6867 #endif
6868   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6869   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6870   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6871   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6872   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6873   PetscFunctionReturn(PETSC_SUCCESS);
6874 }
6875 
6876 /*@C
6877      MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6878          and "off-diagonal" part of the matrix in CSR format.
6879 
6880    Collective
6881 
6882    Input Parameters:
6883 +  comm - MPI communicator
6884 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
6885 .  n - This value should be the same as the local size used in creating the
6886        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6887        calculated if N is given) For square matrices n is almost always m.
6888 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
6889 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
6890 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6891 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6892 .   a - matrix values
6893 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6894 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6895 -   oa - matrix values
6896 
6897    Output Parameter:
6898 .   mat - the matrix
6899 
6900    Level: advanced
6901 
6902    Notes:
6903        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6904        must free the arrays once the matrix has been destroyed and not before.
6905 
6906        The i and j indices are 0 based
6907 
6908        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6909 
6910        This sets local rows and cannot be used to set off-processor values.
6911 
6912        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6913        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6914        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6915        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6916        keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6917        communication if it is known that only local entries will be set.
6918 
6919 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6920           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6921 @*/
6922 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6923 {
6924   Mat_MPIAIJ *maij;
6925 
6926   PetscFunctionBegin;
6927   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6928   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6929   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6930   PetscCall(MatCreate(comm, mat));
6931   PetscCall(MatSetSizes(*mat, m, n, M, N));
6932   PetscCall(MatSetType(*mat, MATMPIAIJ));
6933   maij = (Mat_MPIAIJ *)(*mat)->data;
6934 
6935   (*mat)->preallocated = PETSC_TRUE;
6936 
6937   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6938   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6939 
6940   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6941   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6942 
6943   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6944   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6945   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6946   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6947   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6948   PetscFunctionReturn(PETSC_SUCCESS);
6949 }
6950 
6951 typedef struct {
6952   Mat       *mp;    /* intermediate products */
6953   PetscBool *mptmp; /* is the intermediate product temporary ? */
6954   PetscInt   cp;    /* number of intermediate products */
6955 
6956   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6957   PetscInt    *startsj_s, *startsj_r;
6958   PetscScalar *bufa;
6959   Mat          P_oth;
6960 
6961   /* may take advantage of merging product->B */
6962   Mat Bloc; /* B-local by merging diag and off-diag */
6963 
6964   /* cusparse does not have support to split between symbolic and numeric phases.
6965      When api_user is true, we don't need to update the numerical values
6966      of the temporary storage */
6967   PetscBool reusesym;
6968 
6969   /* support for COO values insertion */
6970   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6971   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6972   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6973   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6974   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6975   PetscMemType mtype;
6976 
6977   /* customization */
6978   PetscBool abmerge;
6979   PetscBool P_oth_bind;
6980 } MatMatMPIAIJBACKEND;
6981 
6982 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6983 {
6984   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6985   PetscInt             i;
6986 
6987   PetscFunctionBegin;
6988   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6989   PetscCall(PetscFree(mmdata->bufa));
6990   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6991   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6992   PetscCall(MatDestroy(&mmdata->P_oth));
6993   PetscCall(MatDestroy(&mmdata->Bloc));
6994   PetscCall(PetscSFDestroy(&mmdata->sf));
6995   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
6996   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
6997   PetscCall(PetscFree(mmdata->own[0]));
6998   PetscCall(PetscFree(mmdata->own));
6999   PetscCall(PetscFree(mmdata->off[0]));
7000   PetscCall(PetscFree(mmdata->off));
7001   PetscCall(PetscFree(mmdata));
7002   PetscFunctionReturn(PETSC_SUCCESS);
7003 }
7004 
7005 /* Copy selected n entries with indices in idx[] of A to v[].
7006    If idx is NULL, copy the whole data array of A to v[]
7007  */
7008 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7009 {
7010   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7011 
7012   PetscFunctionBegin;
7013   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7014   if (f) {
7015     PetscCall((*f)(A, n, idx, v));
7016   } else {
7017     const PetscScalar *vv;
7018 
7019     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7020     if (n && idx) {
7021       PetscScalar    *w  = v;
7022       const PetscInt *oi = idx;
7023       PetscInt        j;
7024 
7025       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7026     } else {
7027       PetscCall(PetscArraycpy(v, vv, n));
7028     }
7029     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7030   }
7031   PetscFunctionReturn(PETSC_SUCCESS);
7032 }
7033 
7034 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7035 {
7036   MatMatMPIAIJBACKEND *mmdata;
7037   PetscInt             i, n_d, n_o;
7038 
7039   PetscFunctionBegin;
7040   MatCheckProduct(C, 1);
7041   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7042   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7043   if (!mmdata->reusesym) { /* update temporary matrices */
7044     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7045     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7046   }
7047   mmdata->reusesym = PETSC_FALSE;
7048 
7049   for (i = 0; i < mmdata->cp; i++) {
7050     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7051     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7052   }
7053   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7054     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7055 
7056     if (mmdata->mptmp[i]) continue;
7057     if (noff) {
7058       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7059 
7060       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7061       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7062       n_o += noff;
7063       n_d += nown;
7064     } else {
7065       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7066 
7067       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7068       n_d += mm->nz;
7069     }
7070   }
7071   if (mmdata->hasoffproc) { /* offprocess insertion */
7072     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7073     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7074   }
7075   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7076   PetscFunctionReturn(PETSC_SUCCESS);
7077 }
7078 
7079 /* Support for Pt * A, A * P, or Pt * A * P */
7080 #define MAX_NUMBER_INTERMEDIATE 4
7081 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7082 {
7083   Mat_Product           *product = C->product;
7084   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7085   Mat_MPIAIJ            *a, *p;
7086   MatMatMPIAIJBACKEND   *mmdata;
7087   ISLocalToGlobalMapping P_oth_l2g = NULL;
7088   IS                     glob      = NULL;
7089   const char            *prefix;
7090   char                   pprefix[256];
7091   const PetscInt        *globidx, *P_oth_idx;
7092   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7093   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7094   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7095                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7096                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7097   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7098 
7099   MatProductType ptype;
7100   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7101   PetscMPIInt    size;
7102 
7103   PetscFunctionBegin;
7104   MatCheckProduct(C, 1);
7105   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7106   ptype = product->type;
7107   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7108     ptype                                          = MATPRODUCT_AB;
7109     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7110   }
7111   switch (ptype) {
7112   case MATPRODUCT_AB:
7113     A          = product->A;
7114     P          = product->B;
7115     m          = A->rmap->n;
7116     n          = P->cmap->n;
7117     M          = A->rmap->N;
7118     N          = P->cmap->N;
7119     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7120     break;
7121   case MATPRODUCT_AtB:
7122     P          = product->A;
7123     A          = product->B;
7124     m          = P->cmap->n;
7125     n          = A->cmap->n;
7126     M          = P->cmap->N;
7127     N          = A->cmap->N;
7128     hasoffproc = PETSC_TRUE;
7129     break;
7130   case MATPRODUCT_PtAP:
7131     A          = product->A;
7132     P          = product->B;
7133     m          = P->cmap->n;
7134     n          = P->cmap->n;
7135     M          = P->cmap->N;
7136     N          = P->cmap->N;
7137     hasoffproc = PETSC_TRUE;
7138     break;
7139   default:
7140     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7141   }
7142   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7143   if (size == 1) hasoffproc = PETSC_FALSE;
7144 
7145   /* defaults */
7146   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7147     mp[i]    = NULL;
7148     mptmp[i] = PETSC_FALSE;
7149     rmapt[i] = -1;
7150     cmapt[i] = -1;
7151     rmapa[i] = NULL;
7152     cmapa[i] = NULL;
7153   }
7154 
7155   /* customization */
7156   PetscCall(PetscNew(&mmdata));
7157   mmdata->reusesym = product->api_user;
7158   if (ptype == MATPRODUCT_AB) {
7159     if (product->api_user) {
7160       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7161       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7162       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7163       PetscOptionsEnd();
7164     } else {
7165       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7166       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7167       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7168       PetscOptionsEnd();
7169     }
7170   } else if (ptype == MATPRODUCT_PtAP) {
7171     if (product->api_user) {
7172       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7173       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7174       PetscOptionsEnd();
7175     } else {
7176       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7177       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7178       PetscOptionsEnd();
7179     }
7180   }
7181   a = (Mat_MPIAIJ *)A->data;
7182   p = (Mat_MPIAIJ *)P->data;
7183   PetscCall(MatSetSizes(C, m, n, M, N));
7184   PetscCall(PetscLayoutSetUp(C->rmap));
7185   PetscCall(PetscLayoutSetUp(C->cmap));
7186   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7187   PetscCall(MatGetOptionsPrefix(C, &prefix));
7188 
7189   cp = 0;
7190   switch (ptype) {
7191   case MATPRODUCT_AB: /* A * P */
7192     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7193 
7194     /* A_diag * P_local (merged or not) */
7195     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7196       /* P is product->B */
7197       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7198       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7199       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7200       PetscCall(MatProductSetFill(mp[cp], product->fill));
7201       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7202       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7203       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7204       mp[cp]->product->api_user = product->api_user;
7205       PetscCall(MatProductSetFromOptions(mp[cp]));
7206       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7207       PetscCall(ISGetIndices(glob, &globidx));
7208       rmapt[cp] = 1;
7209       cmapt[cp] = 2;
7210       cmapa[cp] = globidx;
7211       mptmp[cp] = PETSC_FALSE;
7212       cp++;
7213     } else { /* A_diag * P_diag and A_diag * P_off */
7214       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7215       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7216       PetscCall(MatProductSetFill(mp[cp], product->fill));
7217       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7218       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7219       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7220       mp[cp]->product->api_user = product->api_user;
7221       PetscCall(MatProductSetFromOptions(mp[cp]));
7222       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7223       rmapt[cp] = 1;
7224       cmapt[cp] = 1;
7225       mptmp[cp] = PETSC_FALSE;
7226       cp++;
7227       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7228       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7229       PetscCall(MatProductSetFill(mp[cp], product->fill));
7230       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7231       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7232       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7233       mp[cp]->product->api_user = product->api_user;
7234       PetscCall(MatProductSetFromOptions(mp[cp]));
7235       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7236       rmapt[cp] = 1;
7237       cmapt[cp] = 2;
7238       cmapa[cp] = p->garray;
7239       mptmp[cp] = PETSC_FALSE;
7240       cp++;
7241     }
7242 
7243     /* A_off * P_other */
7244     if (mmdata->P_oth) {
7245       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7246       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7247       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7248       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7249       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7250       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7251       PetscCall(MatProductSetFill(mp[cp], product->fill));
7252       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7253       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7254       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7255       mp[cp]->product->api_user = product->api_user;
7256       PetscCall(MatProductSetFromOptions(mp[cp]));
7257       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7258       rmapt[cp] = 1;
7259       cmapt[cp] = 2;
7260       cmapa[cp] = P_oth_idx;
7261       mptmp[cp] = PETSC_FALSE;
7262       cp++;
7263     }
7264     break;
7265 
7266   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7267     /* A is product->B */
7268     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7269     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7270       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7271       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7272       PetscCall(MatProductSetFill(mp[cp], product->fill));
7273       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7274       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7275       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7276       mp[cp]->product->api_user = product->api_user;
7277       PetscCall(MatProductSetFromOptions(mp[cp]));
7278       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7279       PetscCall(ISGetIndices(glob, &globidx));
7280       rmapt[cp] = 2;
7281       rmapa[cp] = globidx;
7282       cmapt[cp] = 2;
7283       cmapa[cp] = globidx;
7284       mptmp[cp] = PETSC_FALSE;
7285       cp++;
7286     } else {
7287       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7288       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7289       PetscCall(MatProductSetFill(mp[cp], product->fill));
7290       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7291       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7292       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7293       mp[cp]->product->api_user = product->api_user;
7294       PetscCall(MatProductSetFromOptions(mp[cp]));
7295       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7296       PetscCall(ISGetIndices(glob, &globidx));
7297       rmapt[cp] = 1;
7298       cmapt[cp] = 2;
7299       cmapa[cp] = globidx;
7300       mptmp[cp] = PETSC_FALSE;
7301       cp++;
7302       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7303       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7304       PetscCall(MatProductSetFill(mp[cp], product->fill));
7305       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7306       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7307       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7308       mp[cp]->product->api_user = product->api_user;
7309       PetscCall(MatProductSetFromOptions(mp[cp]));
7310       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7311       rmapt[cp] = 2;
7312       rmapa[cp] = p->garray;
7313       cmapt[cp] = 2;
7314       cmapa[cp] = globidx;
7315       mptmp[cp] = PETSC_FALSE;
7316       cp++;
7317     }
7318     break;
7319   case MATPRODUCT_PtAP:
7320     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7321     /* P is product->B */
7322     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7323     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7324     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7325     PetscCall(MatProductSetFill(mp[cp], product->fill));
7326     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7327     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7328     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7329     mp[cp]->product->api_user = product->api_user;
7330     PetscCall(MatProductSetFromOptions(mp[cp]));
7331     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7332     PetscCall(ISGetIndices(glob, &globidx));
7333     rmapt[cp] = 2;
7334     rmapa[cp] = globidx;
7335     cmapt[cp] = 2;
7336     cmapa[cp] = globidx;
7337     mptmp[cp] = PETSC_FALSE;
7338     cp++;
7339     if (mmdata->P_oth) {
7340       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7341       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7342       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7343       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7344       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7345       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7346       PetscCall(MatProductSetFill(mp[cp], product->fill));
7347       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7348       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7349       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7350       mp[cp]->product->api_user = product->api_user;
7351       PetscCall(MatProductSetFromOptions(mp[cp]));
7352       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7353       mptmp[cp] = PETSC_TRUE;
7354       cp++;
7355       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7356       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7357       PetscCall(MatProductSetFill(mp[cp], product->fill));
7358       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7359       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7360       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7361       mp[cp]->product->api_user = product->api_user;
7362       PetscCall(MatProductSetFromOptions(mp[cp]));
7363       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7364       rmapt[cp] = 2;
7365       rmapa[cp] = globidx;
7366       cmapt[cp] = 2;
7367       cmapa[cp] = P_oth_idx;
7368       mptmp[cp] = PETSC_FALSE;
7369       cp++;
7370     }
7371     break;
7372   default:
7373     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7374   }
7375   /* sanity check */
7376   if (size > 1)
7377     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7378 
7379   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7380   for (i = 0; i < cp; i++) {
7381     mmdata->mp[i]    = mp[i];
7382     mmdata->mptmp[i] = mptmp[i];
7383   }
7384   mmdata->cp             = cp;
7385   C->product->data       = mmdata;
7386   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7387   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7388 
7389   /* memory type */
7390   mmdata->mtype = PETSC_MEMTYPE_HOST;
7391   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7392   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7393   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7394   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7395   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7396   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7397 
7398   /* prepare coo coordinates for values insertion */
7399 
7400   /* count total nonzeros of those intermediate seqaij Mats
7401     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7402     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7403     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7404   */
7405   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7406     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7407     if (mptmp[cp]) continue;
7408     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7409       const PetscInt *rmap = rmapa[cp];
7410       const PetscInt  mr   = mp[cp]->rmap->n;
7411       const PetscInt  rs   = C->rmap->rstart;
7412       const PetscInt  re   = C->rmap->rend;
7413       const PetscInt *ii   = mm->i;
7414       for (i = 0; i < mr; i++) {
7415         const PetscInt gr = rmap[i];
7416         const PetscInt nz = ii[i + 1] - ii[i];
7417         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7418         else ncoo_oown += nz;                  /* this row is local */
7419       }
7420     } else ncoo_d += mm->nz;
7421   }
7422 
7423   /*
7424     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7425 
7426     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7427 
7428     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7429 
7430     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7431     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7432     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7433 
7434     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7435     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7436   */
7437   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7438   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7439 
7440   /* gather (i,j) of nonzeros inserted by remote procs */
7441   if (hasoffproc) {
7442     PetscSF  msf;
7443     PetscInt ncoo2, *coo_i2, *coo_j2;
7444 
7445     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7446     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7447     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7448 
7449     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7450       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7451       PetscInt   *idxoff = mmdata->off[cp];
7452       PetscInt   *idxown = mmdata->own[cp];
7453       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7454         const PetscInt *rmap = rmapa[cp];
7455         const PetscInt *cmap = cmapa[cp];
7456         const PetscInt *ii   = mm->i;
7457         PetscInt       *coi  = coo_i + ncoo_o;
7458         PetscInt       *coj  = coo_j + ncoo_o;
7459         const PetscInt  mr   = mp[cp]->rmap->n;
7460         const PetscInt  rs   = C->rmap->rstart;
7461         const PetscInt  re   = C->rmap->rend;
7462         const PetscInt  cs   = C->cmap->rstart;
7463         for (i = 0; i < mr; i++) {
7464           const PetscInt *jj = mm->j + ii[i];
7465           const PetscInt  gr = rmap[i];
7466           const PetscInt  nz = ii[i + 1] - ii[i];
7467           if (gr < rs || gr >= re) { /* this is an offproc row */
7468             for (j = ii[i]; j < ii[i + 1]; j++) {
7469               *coi++    = gr;
7470               *idxoff++ = j;
7471             }
7472             if (!cmapt[cp]) { /* already global */
7473               for (j = 0; j < nz; j++) *coj++ = jj[j];
7474             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7475               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7476             } else { /* offdiag */
7477               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7478             }
7479             ncoo_o += nz;
7480           } else { /* this is a local row */
7481             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7482           }
7483         }
7484       }
7485       mmdata->off[cp + 1] = idxoff;
7486       mmdata->own[cp + 1] = idxown;
7487     }
7488 
7489     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7490     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7491     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7492     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7493     ncoo = ncoo_d + ncoo_oown + ncoo2;
7494     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7495     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7496     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7497     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7498     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7499     PetscCall(PetscFree2(coo_i, coo_j));
7500     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7501     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7502     coo_i = coo_i2;
7503     coo_j = coo_j2;
7504   } else { /* no offproc values insertion */
7505     ncoo = ncoo_d;
7506     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7507 
7508     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7509     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7510     PetscCall(PetscSFSetUp(mmdata->sf));
7511   }
7512   mmdata->hasoffproc = hasoffproc;
7513 
7514   /* gather (i,j) of nonzeros inserted locally */
7515   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7516     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7517     PetscInt       *coi  = coo_i + ncoo_d;
7518     PetscInt       *coj  = coo_j + ncoo_d;
7519     const PetscInt *jj   = mm->j;
7520     const PetscInt *ii   = mm->i;
7521     const PetscInt *cmap = cmapa[cp];
7522     const PetscInt *rmap = rmapa[cp];
7523     const PetscInt  mr   = mp[cp]->rmap->n;
7524     const PetscInt  rs   = C->rmap->rstart;
7525     const PetscInt  re   = C->rmap->rend;
7526     const PetscInt  cs   = C->cmap->rstart;
7527 
7528     if (mptmp[cp]) continue;
7529     if (rmapt[cp] == 1) { /* consecutive rows */
7530       /* fill coo_i */
7531       for (i = 0; i < mr; i++) {
7532         const PetscInt gr = i + rs;
7533         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7534       }
7535       /* fill coo_j */
7536       if (!cmapt[cp]) { /* type-0, already global */
7537         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7538       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7539         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7540       } else {                                            /* type-2, local to global for sparse columns */
7541         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7542       }
7543       ncoo_d += mm->nz;
7544     } else if (rmapt[cp] == 2) { /* sparse rows */
7545       for (i = 0; i < mr; i++) {
7546         const PetscInt *jj = mm->j + ii[i];
7547         const PetscInt  gr = rmap[i];
7548         const PetscInt  nz = ii[i + 1] - ii[i];
7549         if (gr >= rs && gr < re) { /* local rows */
7550           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7551           if (!cmapt[cp]) { /* type-0, already global */
7552             for (j = 0; j < nz; j++) *coj++ = jj[j];
7553           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7554             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7555           } else { /* type-2, local to global for sparse columns */
7556             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7557           }
7558           ncoo_d += nz;
7559         }
7560       }
7561     }
7562   }
7563   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7564   PetscCall(ISDestroy(&glob));
7565   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7566   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7567   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7568   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7569 
7570   /* preallocate with COO data */
7571   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7572   PetscCall(PetscFree2(coo_i, coo_j));
7573   PetscFunctionReturn(PETSC_SUCCESS);
7574 }
7575 
7576 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7577 {
7578   Mat_Product *product = mat->product;
7579 #if defined(PETSC_HAVE_DEVICE)
7580   PetscBool match  = PETSC_FALSE;
7581   PetscBool usecpu = PETSC_FALSE;
7582 #else
7583   PetscBool match = PETSC_TRUE;
7584 #endif
7585 
7586   PetscFunctionBegin;
7587   MatCheckProduct(mat, 1);
7588 #if defined(PETSC_HAVE_DEVICE)
7589   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7590   if (match) { /* we can always fallback to the CPU if requested */
7591     switch (product->type) {
7592     case MATPRODUCT_AB:
7593       if (product->api_user) {
7594         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7595         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7596         PetscOptionsEnd();
7597       } else {
7598         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7599         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7600         PetscOptionsEnd();
7601       }
7602       break;
7603     case MATPRODUCT_AtB:
7604       if (product->api_user) {
7605         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7606         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7607         PetscOptionsEnd();
7608       } else {
7609         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7610         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7611         PetscOptionsEnd();
7612       }
7613       break;
7614     case MATPRODUCT_PtAP:
7615       if (product->api_user) {
7616         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7617         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7618         PetscOptionsEnd();
7619       } else {
7620         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7621         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7622         PetscOptionsEnd();
7623       }
7624       break;
7625     default:
7626       break;
7627     }
7628     match = (PetscBool)!usecpu;
7629   }
7630 #endif
7631   if (match) {
7632     switch (product->type) {
7633     case MATPRODUCT_AB:
7634     case MATPRODUCT_AtB:
7635     case MATPRODUCT_PtAP:
7636       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7637       break;
7638     default:
7639       break;
7640     }
7641   }
7642   /* fallback to MPIAIJ ops */
7643   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7644   PetscFunctionReturn(PETSC_SUCCESS);
7645 }
7646 
7647 /*
7648    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7649 
7650    n - the number of block indices in cc[]
7651    cc - the block indices (must be large enough to contain the indices)
7652 */
7653 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7654 {
7655   PetscInt        cnt = -1, nidx, j;
7656   const PetscInt *idx;
7657 
7658   PetscFunctionBegin;
7659   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7660   if (nidx) {
7661     cnt     = 0;
7662     cc[cnt] = idx[0] / bs;
7663     for (j = 1; j < nidx; j++) {
7664       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7665     }
7666   }
7667   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7668   *n = cnt + 1;
7669   PetscFunctionReturn(PETSC_SUCCESS);
7670 }
7671 
7672 /*
7673     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7674 
7675     ncollapsed - the number of block indices
7676     collapsed - the block indices (must be large enough to contain the indices)
7677 */
7678 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7679 {
7680   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7681 
7682   PetscFunctionBegin;
7683   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7684   for (i = start + 1; i < start + bs; i++) {
7685     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7686     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7687     cprevtmp = cprev;
7688     cprev    = merged;
7689     merged   = cprevtmp;
7690   }
7691   *ncollapsed = nprev;
7692   if (collapsed) *collapsed = cprev;
7693   PetscFunctionReturn(PETSC_SUCCESS);
7694 }
7695 
7696 /*
7697    This will eventually be folded into MatCreateGraph_AIJ() for optimal performance
7698 */
7699 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG)
7700 {
7701   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7702   Mat                tGmat;
7703   MPI_Comm           comm;
7704   const PetscScalar *vals;
7705   const PetscInt    *idx;
7706   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7707   MatScalar         *AA; // this is checked in graph
7708   PetscBool          isseqaij;
7709   Mat                a, b, c;
7710   MatType            jtype;
7711 
7712   PetscFunctionBegin;
7713   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
7714   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
7715   PetscCall(MatGetType(Gmat, &jtype));
7716   PetscCall(MatCreate(comm, &tGmat));
7717   PetscCall(MatSetType(tGmat, jtype));
7718 
7719   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7720                Also, if the matrix is symmetric, can we skip this
7721                operation? It can be very expensive on large matrices. */
7722 
7723   // global sizes
7724   PetscCall(MatGetSize(Gmat, &MM, &NN));
7725   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7726   nloc = Iend - Istart;
7727   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
7728   if (isseqaij) {
7729     a = Gmat;
7730     b = NULL;
7731   } else {
7732     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7733     a             = d->A;
7734     b             = d->B;
7735     garray        = d->garray;
7736   }
7737   /* Determine upper bound on non-zeros needed in new filtered matrix */
7738   for (PetscInt row = 0; row < nloc; row++) {
7739     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
7740     d_nnz[row] = ncols;
7741     if (ncols > maxcols) maxcols = ncols;
7742     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
7743   }
7744   if (b) {
7745     for (PetscInt row = 0; row < nloc; row++) {
7746       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
7747       o_nnz[row] = ncols;
7748       if (ncols > maxcols) maxcols = ncols;
7749       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
7750     }
7751   }
7752   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
7753   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7754   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
7755   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
7756   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7757   PetscCall(PetscFree2(d_nnz, o_nnz));
7758   //
7759   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
7760   nnz0 = nnz1 = 0;
7761   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7762     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7763       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
7764       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7765         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7766         if (PetscRealPart(sv) > vfilter) {
7767           nnz1++;
7768           PetscInt cid = idx[jj] + Istart; //diag
7769           if (c != a) cid = garray[idx[jj]];
7770           AA[ncol_row] = vals[jj];
7771           AJ[ncol_row] = cid;
7772           ncol_row++;
7773         }
7774       }
7775       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
7776       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
7777     }
7778   }
7779   PetscCall(PetscFree2(AA, AJ));
7780   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
7781   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
7782   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
7783 
7784   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
7785 
7786   *filteredG = tGmat;
7787   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7788   PetscFunctionReturn(PETSC_SUCCESS);
7789 }
7790 
7791 /*
7792  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7793 
7794  Input Parameter:
7795  . Amat - matrix
7796  - symmetrize - make the result symmetric
7797  + scale - scale with diagonal
7798 
7799  Output Parameter:
7800  . a_Gmat - output scalar graph >= 0
7801 
7802  */
7803 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat)
7804 {
7805   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7806   MPI_Comm  comm;
7807   Mat       Gmat;
7808   PetscBool ismpiaij, isseqaij;
7809   Mat       a, b, c;
7810   MatType   jtype;
7811 
7812   PetscFunctionBegin;
7813   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7814   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7815   PetscCall(MatGetSize(Amat, &MM, &NN));
7816   PetscCall(MatGetBlockSize(Amat, &bs));
7817   nloc = (Iend - Istart) / bs;
7818 
7819   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7820   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7821   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7822 
7823   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7824   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7825      implementation */
7826   if (bs > 1) {
7827     PetscCall(MatGetType(Amat, &jtype));
7828     PetscCall(MatCreate(comm, &Gmat));
7829     PetscCall(MatSetType(Gmat, jtype));
7830     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7831     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7832     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7833       PetscInt  *d_nnz, *o_nnz;
7834       MatScalar *aa, val, *AA;
7835       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7836       if (isseqaij) {
7837         a = Amat;
7838         b = NULL;
7839       } else {
7840         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7841         a             = d->A;
7842         b             = d->B;
7843       }
7844       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7845       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7846       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7847         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7848         const PetscInt *cols;
7849         for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows
7850           PetscCall(MatGetRow(c, brow, &jj, &cols, NULL));
7851           nnz[brow / bs] = jj / bs;
7852           if (jj % bs) ok = 0;
7853           if (cols) j0 = cols[0];
7854           else j0 = -1;
7855           PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL));
7856           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7857           for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks
7858             PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL));
7859             if (jj % bs) ok = 0;
7860             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7861             if (nnz[brow / bs] != jj / bs) ok = 0;
7862             PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL));
7863           }
7864           if (!ok) {
7865             PetscCall(PetscFree2(d_nnz, o_nnz));
7866             goto old_bs;
7867           }
7868         }
7869       }
7870       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7871       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7872       PetscCall(PetscFree2(d_nnz, o_nnz));
7873       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7874       // diag
7875       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7876         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7877         ai               = aseq->i;
7878         n                = ai[brow + 1] - ai[brow];
7879         aj               = aseq->j + ai[brow];
7880         for (int k = 0; k < n; k += bs) {        // block columns
7881           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7882           val        = 0;
7883           for (int ii = 0; ii < bs; ii++) { // rows in block
7884             aa = aseq->a + ai[brow + ii] + k;
7885             for (int jj = 0; jj < bs; jj++) {         // columns in block
7886               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7887             }
7888           }
7889           AA[k / bs] = val;
7890         }
7891         grow = Istart / bs + brow / bs;
7892         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7893       }
7894       // off-diag
7895       if (ismpiaij) {
7896         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7897         const PetscScalar *vals;
7898         const PetscInt    *cols, *garray = aij->garray;
7899         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7900         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7901           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7902           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7903             AA[k / bs] = 0;
7904             AJ[cidx]   = garray[cols[k]] / bs;
7905           }
7906           nc = ncols / bs;
7907           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7908           for (int ii = 0; ii < bs; ii++) { // rows in block
7909             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7910             for (int k = 0; k < ncols; k += bs) {
7911               for (int jj = 0; jj < bs; jj++) { // cols in block
7912                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7913               }
7914             }
7915             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7916           }
7917           grow = Istart / bs + brow / bs;
7918           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7919         }
7920       }
7921       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7922       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7923       PetscCall(PetscFree2(AA, AJ));
7924     } else {
7925       const PetscScalar *vals;
7926       const PetscInt    *idx;
7927       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7928     old_bs:
7929       /*
7930        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7931        */
7932       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7933       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7934       if (isseqaij) {
7935         PetscInt max_d_nnz;
7936         /*
7937          Determine exact preallocation count for (sequential) scalar matrix
7938          */
7939         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7940         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7941         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7942         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7943         PetscCall(PetscFree3(w0, w1, w2));
7944       } else if (ismpiaij) {
7945         Mat             Daij, Oaij;
7946         const PetscInt *garray;
7947         PetscInt        max_d_nnz;
7948         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7949         /*
7950          Determine exact preallocation count for diagonal block portion of scalar matrix
7951          */
7952         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7953         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7954         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7955         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7956         PetscCall(PetscFree3(w0, w1, w2));
7957         /*
7958          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7959          */
7960         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7961           o_nnz[jj] = 0;
7962           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7963             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7964             o_nnz[jj] += ncols;
7965             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7966           }
7967           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7968         }
7969       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7970       /* get scalar copy (norms) of matrix */
7971       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7972       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7973       PetscCall(PetscFree2(d_nnz, o_nnz));
7974       for (Ii = Istart; Ii < Iend; Ii++) {
7975         PetscInt dest_row = Ii / bs;
7976         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7977         for (jj = 0; jj < ncols; jj++) {
7978           PetscInt    dest_col = idx[jj] / bs;
7979           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7980           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7981         }
7982         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7983       }
7984       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7985       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7986     }
7987   } else {
7988     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7989     else {
7990       Gmat = Amat;
7991       PetscCall(PetscObjectReference((PetscObject)Gmat));
7992     }
7993     if (isseqaij) {
7994       a = Gmat;
7995       b = NULL;
7996     } else {
7997       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7998       a             = d->A;
7999       b             = d->B;
8000     }
8001     if (filter >= 0 || scale) {
8002       /* take absolute value of each entry */
8003       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8004         MatInfo      info;
8005         PetscScalar *avals;
8006         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8007         PetscCall(MatSeqAIJGetArray(c, &avals));
8008         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8009         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8010       }
8011     }
8012   }
8013   if (symmetrize) {
8014     PetscBool isset, issym;
8015     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8016     if (!isset || !issym) {
8017       Mat matTrans;
8018       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8019       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8020       PetscCall(MatDestroy(&matTrans));
8021     }
8022     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8023   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8024   if (scale) {
8025     /* scale c for all diagonal values = 1 or -1 */
8026     Vec diag;
8027     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8028     PetscCall(MatGetDiagonal(Gmat, diag));
8029     PetscCall(VecReciprocal(diag));
8030     PetscCall(VecSqrtAbs(diag));
8031     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8032     PetscCall(VecDestroy(&diag));
8033   }
8034   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8035 
8036   if (filter >= 0) {
8037     Mat Fmat = NULL; /* some silly compiler needs this */
8038 
8039     PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat));
8040     PetscCall(MatDestroy(&Gmat));
8041     Gmat = Fmat;
8042   }
8043   *a_Gmat = Gmat;
8044   PetscFunctionReturn(PETSC_SUCCESS);
8045 }
8046 
8047 /*
8048     Special version for direct calls from Fortran
8049 */
8050 #include <petsc/private/fortranimpl.h>
8051 
8052 /* Change these macros so can be used in void function */
8053 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8054 #undef PetscCall
8055 #define PetscCall(...) \
8056   do { \
8057     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8058     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8059       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8060       return; \
8061     } \
8062   } while (0)
8063 
8064 #undef SETERRQ
8065 #define SETERRQ(comm, ierr, ...) \
8066   do { \
8067     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8068     return; \
8069   } while (0)
8070 
8071 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8072   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8073 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8074   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8075 #else
8076 #endif
8077 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8078 {
8079   Mat         mat = *mmat;
8080   PetscInt    m = *mm, n = *mn;
8081   InsertMode  addv = *maddv;
8082   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8083   PetscScalar value;
8084 
8085   MatCheckPreallocated(mat, 1);
8086   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8087   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8088   {
8089     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8090     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8091     PetscBool roworiented = aij->roworiented;
8092 
8093     /* Some Variables required in the macro */
8094     Mat         A     = aij->A;
8095     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8096     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8097     MatScalar  *aa;
8098     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8099     Mat         B                 = aij->B;
8100     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8101     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8102     MatScalar  *ba;
8103     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8104      * cannot use "#if defined" inside a macro. */
8105     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8106 
8107     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8108     PetscInt   nonew = a->nonew;
8109     MatScalar *ap1, *ap2;
8110 
8111     PetscFunctionBegin;
8112     PetscCall(MatSeqAIJGetArray(A, &aa));
8113     PetscCall(MatSeqAIJGetArray(B, &ba));
8114     for (i = 0; i < m; i++) {
8115       if (im[i] < 0) continue;
8116       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8117       if (im[i] >= rstart && im[i] < rend) {
8118         row      = im[i] - rstart;
8119         lastcol1 = -1;
8120         rp1      = aj + ai[row];
8121         ap1      = aa + ai[row];
8122         rmax1    = aimax[row];
8123         nrow1    = ailen[row];
8124         low1     = 0;
8125         high1    = nrow1;
8126         lastcol2 = -1;
8127         rp2      = bj + bi[row];
8128         ap2      = ba + bi[row];
8129         rmax2    = bimax[row];
8130         nrow2    = bilen[row];
8131         low2     = 0;
8132         high2    = nrow2;
8133 
8134         for (j = 0; j < n; j++) {
8135           if (roworiented) value = v[i * n + j];
8136           else value = v[i + j * m];
8137           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8138           if (in[j] >= cstart && in[j] < cend) {
8139             col = in[j] - cstart;
8140             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8141           } else if (in[j] < 0) continue;
8142           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8143             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8144           } else {
8145             if (mat->was_assembled) {
8146               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8147 #if defined(PETSC_USE_CTABLE)
8148               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8149               col--;
8150 #else
8151               col = aij->colmap[in[j]] - 1;
8152 #endif
8153               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8154                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8155                 col = in[j];
8156                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8157                 B        = aij->B;
8158                 b        = (Mat_SeqAIJ *)B->data;
8159                 bimax    = b->imax;
8160                 bi       = b->i;
8161                 bilen    = b->ilen;
8162                 bj       = b->j;
8163                 rp2      = bj + bi[row];
8164                 ap2      = ba + bi[row];
8165                 rmax2    = bimax[row];
8166                 nrow2    = bilen[row];
8167                 low2     = 0;
8168                 high2    = nrow2;
8169                 bm       = aij->B->rmap->n;
8170                 ba       = b->a;
8171                 inserted = PETSC_FALSE;
8172               }
8173             } else col = in[j];
8174             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8175           }
8176         }
8177       } else if (!aij->donotstash) {
8178         if (roworiented) {
8179           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8180         } else {
8181           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8182         }
8183       }
8184     }
8185     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8186     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8187   }
8188   PetscFunctionReturnVoid();
8189 }
8190 
8191 /* Undefining these here since they were redefined from their original definition above! No
8192  * other PETSc functions should be defined past this point, as it is impossible to recover the
8193  * original definitions */
8194 #undef PetscCall
8195 #undef SETERRQ
8196