xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b6e6beb40ac0a73ccc19b70153df96d5058dce43)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
10 {
11   Mat B;
12 
13   PetscFunctionBegin;
14   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
15   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
16   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
17   PetscCall(MatDestroy(&B));
18   PetscFunctionReturn(PETSC_SUCCESS);
19 }
20 
21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
22 {
23   Mat B;
24 
25   PetscFunctionBegin;
26   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
27   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
28   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
29   PetscFunctionReturn(PETSC_SUCCESS);
30 }
31 
32 /*MC
33    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
34 
35    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
36    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
37   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
38   for communicators controlling multiple processes.  It is recommended that you call both of
39   the above preallocation routines for simplicity.
40 
41    Options Database Keys:
42 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
43 
44   Developer Note:
45     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
46    enough exist.
47 
48   Level: beginner
49 
50 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
51 M*/
52 
53 /*MC
54    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
55 
56    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
57    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
58    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
59   for communicators controlling multiple processes.  It is recommended that you call both of
60   the above preallocation routines for simplicity.
61 
62    Options Database Keys:
63 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
64 
65   Level: beginner
66 
67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
68 M*/
69 
70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
71 {
72   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
73 
74   PetscFunctionBegin;
75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
76   A->boundtocpu = flg;
77 #endif
78   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
79   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
80 
81   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
82    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
83    * to differ from the parent matrix. */
84   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
85   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
86 
87   PetscFunctionReturn(PETSC_SUCCESS);
88 }
89 
90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
91 {
92   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
93 
94   PetscFunctionBegin;
95   if (mat->A) {
96     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
97     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
98   }
99   PetscFunctionReturn(PETSC_SUCCESS);
100 }
101 
102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
103 {
104   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
105   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
106   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
107   const PetscInt  *ia, *ib;
108   const MatScalar *aa, *bb, *aav, *bav;
109   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
110   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
111 
112   PetscFunctionBegin;
113   *keptrows = NULL;
114 
115   ia = a->i;
116   ib = b->i;
117   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
118   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
119   for (i = 0; i < m; i++) {
120     na = ia[i + 1] - ia[i];
121     nb = ib[i + 1] - ib[i];
122     if (!na && !nb) {
123       cnt++;
124       goto ok1;
125     }
126     aa = aav + ia[i];
127     for (j = 0; j < na; j++) {
128       if (aa[j] != 0.0) goto ok1;
129     }
130     bb = bav + ib[i];
131     for (j = 0; j < nb; j++) {
132       if (bb[j] != 0.0) goto ok1;
133     }
134     cnt++;
135   ok1:;
136   }
137   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
138   if (!n0rows) {
139     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
140     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
141     PetscFunctionReturn(PETSC_SUCCESS);
142   }
143   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
144   cnt = 0;
145   for (i = 0; i < m; i++) {
146     na = ia[i + 1] - ia[i];
147     nb = ib[i + 1] - ib[i];
148     if (!na && !nb) continue;
149     aa = aav + ia[i];
150     for (j = 0; j < na; j++) {
151       if (aa[j] != 0.0) {
152         rows[cnt++] = rstart + i;
153         goto ok2;
154       }
155     }
156     bb = bav + ib[i];
157     for (j = 0; j < nb; j++) {
158       if (bb[j] != 0.0) {
159         rows[cnt++] = rstart + i;
160         goto ok2;
161       }
162     }
163   ok2:;
164   }
165   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
166   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
167   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
168   PetscFunctionReturn(PETSC_SUCCESS);
169 }
170 
171 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
172 {
173   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
174   PetscBool   cong;
175 
176   PetscFunctionBegin;
177   PetscCall(MatHasCongruentLayouts(Y, &cong));
178   if (Y->assembled && cong) {
179     PetscCall(MatDiagonalSet(aij->A, D, is));
180   } else {
181     PetscCall(MatDiagonalSet_Default(Y, D, is));
182   }
183   PetscFunctionReturn(PETSC_SUCCESS);
184 }
185 
186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
187 {
188   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
189   PetscInt    i, rstart, nrows, *rows;
190 
191   PetscFunctionBegin;
192   *zrows = NULL;
193   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
194   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
195   for (i = 0; i < nrows; i++) rows[i] += rstart;
196   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
197   PetscFunctionReturn(PETSC_SUCCESS);
198 }
199 
200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
201 {
202   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
203   PetscInt           i, m, n, *garray = aij->garray;
204   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
205   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
206   PetscReal         *work;
207   const PetscScalar *dummy;
208 
209   PetscFunctionBegin;
210   PetscCall(MatGetSize(A, &m, &n));
211   PetscCall(PetscCalloc1(n, &work));
212   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
213   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
214   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
215   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
216   if (type == NORM_2) {
217     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
218     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
219   } else if (type == NORM_1) {
220     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
221     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
222   } else if (type == NORM_INFINITY) {
223     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
224     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
225   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
226     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
227     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
228   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
229     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
230     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
231   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
232   if (type == NORM_INFINITY) {
233     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
234   } else {
235     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
236   }
237   PetscCall(PetscFree(work));
238   if (type == NORM_2) {
239     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
240   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
241     for (i = 0; i < n; i++) reductions[i] /= m;
242   }
243   PetscFunctionReturn(PETSC_SUCCESS);
244 }
245 
246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
247 {
248   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
249   IS              sis, gis;
250   const PetscInt *isis, *igis;
251   PetscInt        n, *iis, nsis, ngis, rstart, i;
252 
253   PetscFunctionBegin;
254   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
255   PetscCall(MatFindNonzeroRows(a->B, &gis));
256   PetscCall(ISGetSize(gis, &ngis));
257   PetscCall(ISGetSize(sis, &nsis));
258   PetscCall(ISGetIndices(sis, &isis));
259   PetscCall(ISGetIndices(gis, &igis));
260 
261   PetscCall(PetscMalloc1(ngis + nsis, &iis));
262   PetscCall(PetscArraycpy(iis, igis, ngis));
263   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
264   n = ngis + nsis;
265   PetscCall(PetscSortRemoveDupsInt(&n, iis));
266   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
267   for (i = 0; i < n; i++) iis[i] += rstart;
268   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
269 
270   PetscCall(ISRestoreIndices(sis, &isis));
271   PetscCall(ISRestoreIndices(gis, &igis));
272   PetscCall(ISDestroy(&sis));
273   PetscCall(ISDestroy(&gis));
274   PetscFunctionReturn(PETSC_SUCCESS);
275 }
276 
277 /*
278   Local utility routine that creates a mapping from the global column
279 number to the local number in the off-diagonal part of the local
280 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
281 a slightly higher hash table cost; without it it is not scalable (each processor
282 has an order N integer array but is fast to access.
283 */
284 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
285 {
286   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
287   PetscInt    n   = aij->B->cmap->n, i;
288 
289   PetscFunctionBegin;
290   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
291 #if defined(PETSC_USE_CTABLE)
292   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
293   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
294 #else
295   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
296   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
297 #endif
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
302   { \
303     if (col <= lastcol1) low1 = 0; \
304     else high1 = nrow1; \
305     lastcol1 = col; \
306     while (high1 - low1 > 5) { \
307       t = (low1 + high1) / 2; \
308       if (rp1[t] > col) high1 = t; \
309       else low1 = t; \
310     } \
311     for (_i = low1; _i < high1; _i++) { \
312       if (rp1[_i] > col) break; \
313       if (rp1[_i] == col) { \
314         if (addv == ADD_VALUES) { \
315           ap1[_i] += value; \
316           /* Not sure LogFlops will slow dow the code or not */ \
317           (void)PetscLogFlops(1.0); \
318         } else ap1[_i] = value; \
319         goto a_noinsert; \
320       } \
321     } \
322     if (value == 0.0 && ignorezeroentries && row != col) { \
323       low1  = 0; \
324       high1 = nrow1; \
325       goto a_noinsert; \
326     } \
327     if (nonew == 1) { \
328       low1  = 0; \
329       high1 = nrow1; \
330       goto a_noinsert; \
331     } \
332     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
333     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
334     N = nrow1++ - 1; \
335     a->nz++; \
336     high1++; \
337     /* shift up all the later entries in this row */ \
338     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
339     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
340     rp1[_i] = col; \
341     ap1[_i] = value; \
342     A->nonzerostate++; \
343   a_noinsert:; \
344     ailen[row] = nrow1; \
345   }
346 
347 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
348   { \
349     if (col <= lastcol2) low2 = 0; \
350     else high2 = nrow2; \
351     lastcol2 = col; \
352     while (high2 - low2 > 5) { \
353       t = (low2 + high2) / 2; \
354       if (rp2[t] > col) high2 = t; \
355       else low2 = t; \
356     } \
357     for (_i = low2; _i < high2; _i++) { \
358       if (rp2[_i] > col) break; \
359       if (rp2[_i] == col) { \
360         if (addv == ADD_VALUES) { \
361           ap2[_i] += value; \
362           (void)PetscLogFlops(1.0); \
363         } else ap2[_i] = value; \
364         goto b_noinsert; \
365       } \
366     } \
367     if (value == 0.0 && ignorezeroentries) { \
368       low2  = 0; \
369       high2 = nrow2; \
370       goto b_noinsert; \
371     } \
372     if (nonew == 1) { \
373       low2  = 0; \
374       high2 = nrow2; \
375       goto b_noinsert; \
376     } \
377     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
378     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
379     N = nrow2++ - 1; \
380     b->nz++; \
381     high2++; \
382     /* shift up all the later entries in this row */ \
383     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
384     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
385     rp2[_i] = col; \
386     ap2[_i] = value; \
387     B->nonzerostate++; \
388   b_noinsert:; \
389     bilen[row] = nrow2; \
390   }
391 
392 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
393 {
394   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
395   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
396   PetscInt     l, *garray                         = mat->garray, diag;
397   PetscScalar *aa, *ba;
398 
399   PetscFunctionBegin;
400   /* code only works for square matrices A */
401 
402   /* find size of row to the left of the diagonal part */
403   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
404   row = row - diag;
405   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
406     if (garray[b->j[b->i[row] + l]] > diag) break;
407   }
408   if (l) {
409     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
410     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
411     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
412   }
413 
414   /* diagonal part */
415   if (a->i[row + 1] - a->i[row]) {
416     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
417     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
418     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
419   }
420 
421   /* right of diagonal part */
422   if (b->i[row + 1] - b->i[row] - l) {
423     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
424     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
425     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
426   }
427   PetscFunctionReturn(PETSC_SUCCESS);
428 }
429 
430 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
431 {
432   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
433   PetscScalar value = 0.0;
434   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
435   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
436   PetscBool   roworiented = aij->roworiented;
437 
438   /* Some Variables required in the macro */
439   Mat         A     = aij->A;
440   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
441   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
442   PetscBool   ignorezeroentries = a->ignorezeroentries;
443   Mat         B                 = aij->B;
444   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
445   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
446   MatScalar  *aa, *ba;
447   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
448   PetscInt    nonew;
449   MatScalar  *ap1, *ap2;
450 
451   PetscFunctionBegin;
452   PetscCall(MatSeqAIJGetArray(A, &aa));
453   PetscCall(MatSeqAIJGetArray(B, &ba));
454   for (i = 0; i < m; i++) {
455     if (im[i] < 0) continue;
456     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
457     if (im[i] >= rstart && im[i] < rend) {
458       row      = im[i] - rstart;
459       lastcol1 = -1;
460       rp1      = aj + ai[row];
461       ap1      = aa + ai[row];
462       rmax1    = aimax[row];
463       nrow1    = ailen[row];
464       low1     = 0;
465       high1    = nrow1;
466       lastcol2 = -1;
467       rp2      = bj + bi[row];
468       ap2      = ba + bi[row];
469       rmax2    = bimax[row];
470       nrow2    = bilen[row];
471       low2     = 0;
472       high2    = nrow2;
473 
474       for (j = 0; j < n; j++) {
475         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
476         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
477         if (in[j] >= cstart && in[j] < cend) {
478           col   = in[j] - cstart;
479           nonew = a->nonew;
480           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
481         } else if (in[j] < 0) {
482           continue;
483         } else {
484           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
485           if (mat->was_assembled) {
486             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
487 #if defined(PETSC_USE_CTABLE)
488             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
489             col--;
490 #else
491             col = aij->colmap[in[j]] - 1;
492 #endif
493             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
494               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
495               col = in[j];
496               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
497               B     = aij->B;
498               b     = (Mat_SeqAIJ *)B->data;
499               bimax = b->imax;
500               bi    = b->i;
501               bilen = b->ilen;
502               bj    = b->j;
503               ba    = b->a;
504               rp2   = bj + bi[row];
505               ap2   = ba + bi[row];
506               rmax2 = bimax[row];
507               nrow2 = bilen[row];
508               low2  = 0;
509               high2 = nrow2;
510               bm    = aij->B->rmap->n;
511               ba    = b->a;
512             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
513               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
514                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
515               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
516             }
517           } else col = in[j];
518           nonew = b->nonew;
519           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
520         }
521       }
522     } else {
523       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
524       if (!aij->donotstash) {
525         mat->assembled = PETSC_FALSE;
526         if (roworiented) {
527           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
528         } else {
529           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
530         }
531       }
532     }
533   }
534   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
535   PetscCall(MatSeqAIJRestoreArray(B, &ba));
536   PetscFunctionReturn(PETSC_SUCCESS);
537 }
538 
539 /*
540     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
541     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
542     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
543 */
544 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
545 {
546   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
547   Mat         A      = aij->A; /* diagonal part of the matrix */
548   Mat         B      = aij->B; /* offdiagonal part of the matrix */
549   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
550   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
551   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
552   PetscInt   *ailen = a->ilen, *aj = a->j;
553   PetscInt   *bilen = b->ilen, *bj = b->j;
554   PetscInt    am          = aij->A->rmap->n, j;
555   PetscInt    diag_so_far = 0, dnz;
556   PetscInt    offd_so_far = 0, onz;
557 
558   PetscFunctionBegin;
559   /* Iterate over all rows of the matrix */
560   for (j = 0; j < am; j++) {
561     dnz = onz = 0;
562     /*  Iterate over all non-zero columns of the current row */
563     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
564       /* If column is in the diagonal */
565       if (mat_j[col] >= cstart && mat_j[col] < cend) {
566         aj[diag_so_far++] = mat_j[col] - cstart;
567         dnz++;
568       } else { /* off-diagonal entries */
569         bj[offd_so_far++] = mat_j[col];
570         onz++;
571       }
572     }
573     ailen[j] = dnz;
574     bilen[j] = onz;
575   }
576   PetscFunctionReturn(PETSC_SUCCESS);
577 }
578 
579 /*
580     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
581     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
582     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
583     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
584     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
585 */
586 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
587 {
588   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
589   Mat          A    = aij->A; /* diagonal part of the matrix */
590   Mat          B    = aij->B; /* offdiagonal part of the matrix */
591   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
592   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
593   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
594   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
595   PetscInt    *ailen = a->ilen, *aj = a->j;
596   PetscInt    *bilen = b->ilen, *bj = b->j;
597   PetscInt     am          = aij->A->rmap->n, j;
598   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
599   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
600   PetscScalar *aa = a->a, *ba = b->a;
601 
602   PetscFunctionBegin;
603   /* Iterate over all rows of the matrix */
604   for (j = 0; j < am; j++) {
605     dnz_row = onz_row = 0;
606     rowstart_offd     = full_offd_i[j];
607     rowstart_diag     = full_diag_i[j];
608     /*  Iterate over all non-zero columns of the current row */
609     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
610       /* If column is in the diagonal */
611       if (mat_j[col] >= cstart && mat_j[col] < cend) {
612         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
613         aa[rowstart_diag + dnz_row] = mat_a[col];
614         dnz_row++;
615       } else { /* off-diagonal entries */
616         bj[rowstart_offd + onz_row] = mat_j[col];
617         ba[rowstart_offd + onz_row] = mat_a[col];
618         onz_row++;
619       }
620     }
621     ailen[j] = dnz_row;
622     bilen[j] = onz_row;
623   }
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
627 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
628 {
629   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
630   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
631   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
632 
633   PetscFunctionBegin;
634   for (i = 0; i < m; i++) {
635     if (idxm[i] < 0) continue; /* negative row */
636     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
637     if (idxm[i] >= rstart && idxm[i] < rend) {
638       row = idxm[i] - rstart;
639       for (j = 0; j < n; j++) {
640         if (idxn[j] < 0) continue; /* negative column */
641         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
642         if (idxn[j] >= cstart && idxn[j] < cend) {
643           col = idxn[j] - cstart;
644           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
645         } else {
646           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
647 #if defined(PETSC_USE_CTABLE)
648           PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
649           col--;
650 #else
651           col = aij->colmap[idxn[j]] - 1;
652 #endif
653           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
654           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
655         }
656       }
657     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
658   }
659   PetscFunctionReturn(PETSC_SUCCESS);
660 }
661 
662 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
663 {
664   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
665   PetscInt    nstash, reallocs;
666 
667   PetscFunctionBegin;
668   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
669 
670   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
671   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
672   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
673   PetscFunctionReturn(PETSC_SUCCESS);
674 }
675 
676 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
677 {
678   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
679   PetscMPIInt  n;
680   PetscInt     i, j, rstart, ncols, flg;
681   PetscInt    *row, *col;
682   PetscBool    other_disassembled;
683   PetscScalar *val;
684 
685   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
686 
687   PetscFunctionBegin;
688   if (!aij->donotstash && !mat->nooffprocentries) {
689     while (1) {
690       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
691       if (!flg) break;
692 
693       for (i = 0; i < n;) {
694         /* Now identify the consecutive vals belonging to the same row */
695         for (j = i, rstart = row[j]; j < n; j++) {
696           if (row[j] != rstart) break;
697         }
698         if (j < n) ncols = j - i;
699         else ncols = n - i;
700         /* Now assemble all these values with a single function call */
701         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
702         i = j;
703       }
704     }
705     PetscCall(MatStashScatterEnd_Private(&mat->stash));
706   }
707 #if defined(PETSC_HAVE_DEVICE)
708   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
709   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
710   if (mat->boundtocpu) {
711     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
712     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
713   }
714 #endif
715   PetscCall(MatAssemblyBegin(aij->A, mode));
716   PetscCall(MatAssemblyEnd(aij->A, mode));
717 
718   /* determine if any processor has disassembled, if so we must
719      also disassemble ourself, in order that we may reassemble. */
720   /*
721      if nonzero structure of submatrix B cannot change then we know that
722      no processor disassembled thus we can skip this stuff
723   */
724   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
725     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
726     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
727       PetscCall(MatDisAssemble_MPIAIJ(mat));
728     }
729   }
730   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
731   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
732 #if defined(PETSC_HAVE_DEVICE)
733   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
734 #endif
735   PetscCall(MatAssemblyBegin(aij->B, mode));
736   PetscCall(MatAssemblyEnd(aij->B, mode));
737 
738   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
739 
740   aij->rowvalues = NULL;
741 
742   PetscCall(VecDestroy(&aij->diag));
743 
744   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
745   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
746     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
747     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
748   }
749 #if defined(PETSC_HAVE_DEVICE)
750   mat->offloadmask = PETSC_OFFLOAD_BOTH;
751 #endif
752   PetscFunctionReturn(PETSC_SUCCESS);
753 }
754 
755 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
756 {
757   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
758 
759   PetscFunctionBegin;
760   PetscCall(MatZeroEntries(l->A));
761   PetscCall(MatZeroEntries(l->B));
762   PetscFunctionReturn(PETSC_SUCCESS);
763 }
764 
765 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
766 {
767   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
768   PetscObjectState sA, sB;
769   PetscInt        *lrows;
770   PetscInt         r, len;
771   PetscBool        cong, lch, gch;
772 
773   PetscFunctionBegin;
774   /* get locally owned rows */
775   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
776   PetscCall(MatHasCongruentLayouts(A, &cong));
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
783     PetscCall(VecGetArrayRead(x, &xx));
784     PetscCall(VecGetArray(b, &bb));
785     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
786     PetscCall(VecRestoreArrayRead(x, &xx));
787     PetscCall(VecRestoreArray(b, &bb));
788   }
789 
790   sA = mat->A->nonzerostate;
791   sB = mat->B->nonzerostate;
792 
793   if (diag != 0.0 && cong) {
794     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
795     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
796   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
797     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
798     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
799     PetscInt    nnwA, nnwB;
800     PetscBool   nnzA, nnzB;
801 
802     nnwA = aijA->nonew;
803     nnwB = aijB->nonew;
804     nnzA = aijA->keepnonzeropattern;
805     nnzB = aijB->keepnonzeropattern;
806     if (!nnzA) {
807       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
808       aijA->nonew = 0;
809     }
810     if (!nnzB) {
811       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
812       aijB->nonew = 0;
813     }
814     /* Must zero here before the next loop */
815     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
816     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       if (row >= A->cmap->N) continue;
820       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
821     }
822     aijA->nonew = nnwA;
823     aijB->nonew = nnwB;
824   } else {
825     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
826     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
827   }
828   PetscCall(PetscFree(lrows));
829   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
830   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
831 
832   /* reduce nonzerostate */
833   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
834   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
835   if (gch) A->nonzerostate++;
836   PetscFunctionReturn(PETSC_SUCCESS);
837 }
838 
839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
840 {
841   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
842   PetscMPIInt        n = A->rmap->n;
843   PetscInt           i, j, r, m, len = 0;
844   PetscInt          *lrows, *owners = A->rmap->range;
845   PetscMPIInt        p = 0;
846   PetscSFNode       *rrows;
847   PetscSF            sf;
848   const PetscScalar *xx;
849   PetscScalar       *bb, *mask, *aij_a;
850   Vec                xmask, lmask;
851   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
852   const PetscInt    *aj, *ii, *ridx;
853   PetscScalar       *aa;
854 
855   PetscFunctionBegin;
856   /* Create SF where leaves are input rows and roots are owned rows */
857   PetscCall(PetscMalloc1(n, &lrows));
858   for (r = 0; r < n; ++r) lrows[r] = -1;
859   PetscCall(PetscMalloc1(N, &rrows));
860   for (r = 0; r < N; ++r) {
861     const PetscInt idx = rows[r];
862     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
863     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
864       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
865     }
866     rrows[r].rank  = p;
867     rrows[r].index = rows[r] - owners[p];
868   }
869   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
870   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
871   /* Collect flags for rows to be zeroed */
872   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
873   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
874   PetscCall(PetscSFDestroy(&sf));
875   /* Compress and put in row numbers */
876   for (r = 0; r < n; ++r)
877     if (lrows[r] >= 0) lrows[len++] = r;
878   /* zero diagonal part of matrix */
879   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
880   /* handle off diagonal part of matrix */
881   PetscCall(MatCreateVecs(A, &xmask, NULL));
882   PetscCall(VecDuplicate(l->lvec, &lmask));
883   PetscCall(VecGetArray(xmask, &bb));
884   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
885   PetscCall(VecRestoreArray(xmask, &bb));
886   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
887   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
888   PetscCall(VecDestroy(&xmask));
889   if (x && b) { /* this code is buggy when the row and column layout don't match */
890     PetscBool cong;
891 
892     PetscCall(MatHasCongruentLayouts(A, &cong));
893     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
894     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
895     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
896     PetscCall(VecGetArrayRead(l->lvec, &xx));
897     PetscCall(VecGetArray(b, &bb));
898   }
899   PetscCall(VecGetArray(lmask, &mask));
900   /* remove zeroed rows of off diagonal matrix */
901   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
902   ii = aij->i;
903   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
904   /* loop over all elements of off process part of matrix zeroing removed columns*/
905   if (aij->compressedrow.use) {
906     m    = aij->compressedrow.nrows;
907     ii   = aij->compressedrow.i;
908     ridx = aij->compressedrow.rindex;
909     for (i = 0; i < m; i++) {
910       n  = ii[i + 1] - ii[i];
911       aj = aij->j + ii[i];
912       aa = aij_a + ii[i];
913 
914       for (j = 0; j < n; j++) {
915         if (PetscAbsScalar(mask[*aj])) {
916           if (b) bb[*ridx] -= *aa * xx[*aj];
917           *aa = 0.0;
918         }
919         aa++;
920         aj++;
921       }
922       ridx++;
923     }
924   } else { /* do not use compressed row format */
925     m = l->B->rmap->n;
926     for (i = 0; i < m; i++) {
927       n  = ii[i + 1] - ii[i];
928       aj = aij->j + ii[i];
929       aa = aij_a + ii[i];
930       for (j = 0; j < n; j++) {
931         if (PetscAbsScalar(mask[*aj])) {
932           if (b) bb[i] -= *aa * xx[*aj];
933           *aa = 0.0;
934         }
935         aa++;
936         aj++;
937       }
938     }
939   }
940   if (x && b) {
941     PetscCall(VecRestoreArray(b, &bb));
942     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
943   }
944   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
945   PetscCall(VecRestoreArray(lmask, &mask));
946   PetscCall(VecDestroy(&lmask));
947   PetscCall(PetscFree(lrows));
948 
949   /* only change matrix nonzero state if pattern was allowed to be changed */
950   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
951     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
952     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
953   }
954   PetscFunctionReturn(PETSC_SUCCESS);
955 }
956 
957 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
958 {
959   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
960   PetscInt    nt;
961   VecScatter  Mvctx = a->Mvctx;
962 
963   PetscFunctionBegin;
964   PetscCall(VecGetLocalSize(xx, &nt));
965   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
966   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
967   PetscUseTypeMethod(a->A, mult, xx, yy);
968   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
969   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
970   PetscFunctionReturn(PETSC_SUCCESS);
971 }
972 
973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
974 {
975   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
976 
977   PetscFunctionBegin;
978   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
979   PetscFunctionReturn(PETSC_SUCCESS);
980 }
981 
982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
983 {
984   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
985   VecScatter  Mvctx = a->Mvctx;
986 
987   PetscFunctionBegin;
988   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
989   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
990   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
991   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
992   PetscFunctionReturn(PETSC_SUCCESS);
993 }
994 
995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
996 {
997   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
998 
999   PetscFunctionBegin;
1000   /* do nondiagonal part */
1001   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1002   /* do local part */
1003   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1004   /* add partial results together */
1005   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1006   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1007   PetscFunctionReturn(PETSC_SUCCESS);
1008 }
1009 
1010 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1011 {
1012   MPI_Comm    comm;
1013   Mat_MPIAIJ *Aij  = (Mat_MPIAIJ *)Amat->data, *Bij;
1014   Mat         Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
1015   IS          Me, Notme;
1016   PetscInt    M, N, first, last, *notme, i;
1017   PetscBool   lf;
1018   PetscMPIInt size;
1019 
1020   PetscFunctionBegin;
1021   /* Easy test: symmetric diagonal block */
1022   Bij  = (Mat_MPIAIJ *)Bmat->data;
1023   Bdia = Bij->A;
1024   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1025   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1026   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1027   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1028   PetscCallMPI(MPI_Comm_size(comm, &size));
1029   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1030 
1031   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1032   PetscCall(MatGetSize(Amat, &M, &N));
1033   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1034   PetscCall(PetscMalloc1(N - last + first, &notme));
1035   for (i = 0; i < first; i++) notme[i] = i;
1036   for (i = last; i < M; i++) notme[i - last + first] = i;
1037   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1038   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1039   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1040   Aoff = Aoffs[0];
1041   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1042   Boff = Boffs[0];
1043   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1044   PetscCall(MatDestroyMatrices(1, &Aoffs));
1045   PetscCall(MatDestroyMatrices(1, &Boffs));
1046   PetscCall(ISDestroy(&Me));
1047   PetscCall(ISDestroy(&Notme));
1048   PetscCall(PetscFree(notme));
1049   PetscFunctionReturn(PETSC_SUCCESS);
1050 }
1051 
1052 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1053 {
1054   PetscFunctionBegin;
1055   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1056   PetscFunctionReturn(PETSC_SUCCESS);
1057 }
1058 
1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1060 {
1061   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1062 
1063   PetscFunctionBegin;
1064   /* do nondiagonal part */
1065   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1066   /* do local part */
1067   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1068   /* add partial results together */
1069   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1070   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1071   PetscFunctionReturn(PETSC_SUCCESS);
1072 }
1073 
1074 /*
1075   This only works correctly for square matrices where the subblock A->A is the
1076    diagonal block
1077 */
1078 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1079 {
1080   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1081 
1082   PetscFunctionBegin;
1083   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1084   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1085   PetscCall(MatGetDiagonal(a->A, v));
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
1089 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1090 {
1091   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1092 
1093   PetscFunctionBegin;
1094   PetscCall(MatScale(a->A, aa));
1095   PetscCall(MatScale(a->B, aa));
1096   PetscFunctionReturn(PETSC_SUCCESS);
1097 }
1098 
1099 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1100 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1101 {
1102   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1103 
1104   PetscFunctionBegin;
1105   PetscCall(PetscSFDestroy(&aij->coo_sf));
1106   PetscCall(PetscFree(aij->Aperm1));
1107   PetscCall(PetscFree(aij->Bperm1));
1108   PetscCall(PetscFree(aij->Ajmap1));
1109   PetscCall(PetscFree(aij->Bjmap1));
1110 
1111   PetscCall(PetscFree(aij->Aimap2));
1112   PetscCall(PetscFree(aij->Bimap2));
1113   PetscCall(PetscFree(aij->Aperm2));
1114   PetscCall(PetscFree(aij->Bperm2));
1115   PetscCall(PetscFree(aij->Ajmap2));
1116   PetscCall(PetscFree(aij->Bjmap2));
1117 
1118   PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf));
1119   PetscCall(PetscFree(aij->Cperm1));
1120   PetscFunctionReturn(PETSC_SUCCESS);
1121 }
1122 
1123 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1124 {
1125   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1126 
1127   PetscFunctionBegin;
1128 #if defined(PETSC_USE_LOG)
1129   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
1130 #endif
1131   PetscCall(MatStashDestroy_Private(&mat->stash));
1132   PetscCall(VecDestroy(&aij->diag));
1133   PetscCall(MatDestroy(&aij->A));
1134   PetscCall(MatDestroy(&aij->B));
1135 #if defined(PETSC_USE_CTABLE)
1136   PetscCall(PetscHMapIDestroy(&aij->colmap));
1137 #else
1138   PetscCall(PetscFree(aij->colmap));
1139 #endif
1140   PetscCall(PetscFree(aij->garray));
1141   PetscCall(VecDestroy(&aij->lvec));
1142   PetscCall(VecScatterDestroy(&aij->Mvctx));
1143   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
1144   PetscCall(PetscFree(aij->ld));
1145 
1146   /* Free COO */
1147   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1148 
1149   PetscCall(PetscFree(mat->data));
1150 
1151   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1152   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
1153 
1154   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1155   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1156   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1157   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
1158   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
1159   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
1160   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
1161   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
1162   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
1164 #if defined(PETSC_HAVE_CUDA)
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
1166 #endif
1167 #if defined(PETSC_HAVE_HIP)
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
1169 #endif
1170 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1171   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
1172 #endif
1173   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
1174 #if defined(PETSC_HAVE_ELEMENTAL)
1175   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
1176 #endif
1177 #if defined(PETSC_HAVE_SCALAPACK)
1178   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
1179 #endif
1180 #if defined(PETSC_HAVE_HYPRE)
1181   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
1182   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
1183 #endif
1184   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1185   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
1186   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
1187   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
1188   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
1189   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
1190 #if defined(PETSC_HAVE_MKL_SPARSE)
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
1192 #endif
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
1194   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1195   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
1196   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
1197   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
1198   PetscFunctionReturn(PETSC_SUCCESS);
1199 }
1200 
1201 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1202 {
1203   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1204   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1205   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1206   const PetscInt    *garray = aij->garray;
1207   const PetscScalar *aa, *ba;
1208   PetscInt           header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb;
1209   PetscInt          *rowlens;
1210   PetscInt          *colidxs;
1211   PetscScalar       *matvals;
1212 
1213   PetscFunctionBegin;
1214   PetscCall(PetscViewerSetUp(viewer));
1215 
1216   M  = mat->rmap->N;
1217   N  = mat->cmap->N;
1218   m  = mat->rmap->n;
1219   rs = mat->rmap->rstart;
1220   cs = mat->cmap->rstart;
1221   nz = A->nz + B->nz;
1222 
1223   /* write matrix header */
1224   header[0] = MAT_FILE_CLASSID;
1225   header[1] = M;
1226   header[2] = N;
1227   header[3] = nz;
1228   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1229   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1230 
1231   /* fill in and store row lengths  */
1232   PetscCall(PetscMalloc1(m, &rowlens));
1233   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1234   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1235   PetscCall(PetscFree(rowlens));
1236 
1237   /* fill in and store column indices */
1238   PetscCall(PetscMalloc1(nz, &colidxs));
1239   for (cnt = 0, i = 0; i < m; i++) {
1240     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1241       if (garray[B->j[jb]] > cs) break;
1242       colidxs[cnt++] = garray[B->j[jb]];
1243     }
1244     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1245     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1246   }
1247   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1248   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1249   PetscCall(PetscFree(colidxs));
1250 
1251   /* fill in and store nonzero values */
1252   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1253   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1254   PetscCall(PetscMalloc1(nz, &matvals));
1255   for (cnt = 0, i = 0; i < m; i++) {
1256     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1257       if (garray[B->j[jb]] > cs) break;
1258       matvals[cnt++] = ba[jb];
1259     }
1260     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1261     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1262   }
1263   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1264   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1265   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1266   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1267   PetscCall(PetscFree(matvals));
1268 
1269   /* write block size option to the viewer's .info file */
1270   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1271   PetscFunctionReturn(PETSC_SUCCESS);
1272 }
1273 
1274 #include <petscdraw.h>
1275 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1276 {
1277   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1278   PetscMPIInt       rank = aij->rank, size = aij->size;
1279   PetscBool         isdraw, iascii, isbinary;
1280   PetscViewer       sviewer;
1281   PetscViewerFormat format;
1282 
1283   PetscFunctionBegin;
1284   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1285   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1286   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1287   if (iascii) {
1288     PetscCall(PetscViewerGetFormat(viewer, &format));
1289     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1290       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1291       PetscCall(PetscMalloc1(size, &nz));
1292       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1293       for (i = 0; i < (PetscInt)size; i++) {
1294         nmax = PetscMax(nmax, nz[i]);
1295         nmin = PetscMin(nmin, nz[i]);
1296         navg += nz[i];
1297       }
1298       PetscCall(PetscFree(nz));
1299       navg = navg / size;
1300       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1301       PetscFunctionReturn(PETSC_SUCCESS);
1302     }
1303     PetscCall(PetscViewerGetFormat(viewer, &format));
1304     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1305       MatInfo   info;
1306       PetscInt *inodes = NULL;
1307 
1308       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1309       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1310       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1311       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1312       if (!inodes) {
1313         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1314                                                      (double)info.memory));
1315       } else {
1316         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1317                                                      (double)info.memory));
1318       }
1319       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1320       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1321       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1322       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1323       PetscCall(PetscViewerFlush(viewer));
1324       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1325       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1326       PetscCall(VecScatterView(aij->Mvctx, viewer));
1327       PetscFunctionReturn(PETSC_SUCCESS);
1328     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1329       PetscInt inodecount, inodelimit, *inodes;
1330       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1331       if (inodes) {
1332         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1333       } else {
1334         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1335       }
1336       PetscFunctionReturn(PETSC_SUCCESS);
1337     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1338       PetscFunctionReturn(PETSC_SUCCESS);
1339     }
1340   } else if (isbinary) {
1341     if (size == 1) {
1342       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1343       PetscCall(MatView(aij->A, viewer));
1344     } else {
1345       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1346     }
1347     PetscFunctionReturn(PETSC_SUCCESS);
1348   } else if (iascii && size == 1) {
1349     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1350     PetscCall(MatView(aij->A, viewer));
1351     PetscFunctionReturn(PETSC_SUCCESS);
1352   } else if (isdraw) {
1353     PetscDraw draw;
1354     PetscBool isnull;
1355     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1356     PetscCall(PetscDrawIsNull(draw, &isnull));
1357     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1358   }
1359 
1360   { /* assemble the entire matrix onto first processor */
1361     Mat A = NULL, Av;
1362     IS  isrow, iscol;
1363 
1364     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1365     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1366     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1367     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1368     /*  The commented code uses MatCreateSubMatrices instead */
1369     /*
1370     Mat *AA, A = NULL, Av;
1371     IS  isrow,iscol;
1372 
1373     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1374     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1375     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1376     if (rank == 0) {
1377        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1378        A    = AA[0];
1379        Av   = AA[0];
1380     }
1381     PetscCall(MatDestroySubMatrices(1,&AA));
1382 */
1383     PetscCall(ISDestroy(&iscol));
1384     PetscCall(ISDestroy(&isrow));
1385     /*
1386        Everyone has to call to draw the matrix since the graphics waits are
1387        synchronized across all processors that share the PetscDraw object
1388     */
1389     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1390     if (rank == 0) {
1391       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1392       PetscCall(MatView_SeqAIJ(Av, sviewer));
1393     }
1394     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1395     PetscCall(PetscViewerFlush(viewer));
1396     PetscCall(MatDestroy(&A));
1397   }
1398   PetscFunctionReturn(PETSC_SUCCESS);
1399 }
1400 
1401 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1402 {
1403   PetscBool iascii, isdraw, issocket, isbinary;
1404 
1405   PetscFunctionBegin;
1406   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1407   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1408   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1409   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1410   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1411   PetscFunctionReturn(PETSC_SUCCESS);
1412 }
1413 
1414 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1415 {
1416   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1417   Vec         bb1 = NULL;
1418   PetscBool   hasop;
1419 
1420   PetscFunctionBegin;
1421   if (flag == SOR_APPLY_UPPER) {
1422     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1423     PetscFunctionReturn(PETSC_SUCCESS);
1424   }
1425 
1426   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1427 
1428   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1429     if (flag & SOR_ZERO_INITIAL_GUESS) {
1430       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1431       its--;
1432     }
1433 
1434     while (its--) {
1435       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1436       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437 
1438       /* update rhs: bb1 = bb - B*x */
1439       PetscCall(VecScale(mat->lvec, -1.0));
1440       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1441 
1442       /* local sweep */
1443       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1444     }
1445   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1446     if (flag & SOR_ZERO_INITIAL_GUESS) {
1447       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1448       its--;
1449     }
1450     while (its--) {
1451       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1452       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453 
1454       /* update rhs: bb1 = bb - B*x */
1455       PetscCall(VecScale(mat->lvec, -1.0));
1456       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1457 
1458       /* local sweep */
1459       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1460     }
1461   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1462     if (flag & SOR_ZERO_INITIAL_GUESS) {
1463       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1464       its--;
1465     }
1466     while (its--) {
1467       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1468       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1469 
1470       /* update rhs: bb1 = bb - B*x */
1471       PetscCall(VecScale(mat->lvec, -1.0));
1472       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1473 
1474       /* local sweep */
1475       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1476     }
1477   } else if (flag & SOR_EISENSTAT) {
1478     Vec xx1;
1479 
1480     PetscCall(VecDuplicate(bb, &xx1));
1481     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1482 
1483     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1484     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1485     if (!mat->diag) {
1486       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1487       PetscCall(MatGetDiagonal(matin, mat->diag));
1488     }
1489     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1490     if (hasop) {
1491       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1492     } else {
1493       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1494     }
1495     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1496 
1497     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1498 
1499     /* local sweep */
1500     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1501     PetscCall(VecAXPY(xx, 1.0, xx1));
1502     PetscCall(VecDestroy(&xx1));
1503   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1504 
1505   PetscCall(VecDestroy(&bb1));
1506 
1507   matin->factorerrortype = mat->A->factorerrortype;
1508   PetscFunctionReturn(PETSC_SUCCESS);
1509 }
1510 
1511 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1512 {
1513   Mat             aA, aB, Aperm;
1514   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1515   PetscScalar    *aa, *ba;
1516   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1517   PetscSF         rowsf, sf;
1518   IS              parcolp = NULL;
1519   PetscBool       done;
1520 
1521   PetscFunctionBegin;
1522   PetscCall(MatGetLocalSize(A, &m, &n));
1523   PetscCall(ISGetIndices(rowp, &rwant));
1524   PetscCall(ISGetIndices(colp, &cwant));
1525   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1526 
1527   /* Invert row permutation to find out where my rows should go */
1528   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1529   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1530   PetscCall(PetscSFSetFromOptions(rowsf));
1531   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1532   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1533   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1534 
1535   /* Invert column permutation to find out where my columns should go */
1536   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1537   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1538   PetscCall(PetscSFSetFromOptions(sf));
1539   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1540   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1541   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1542   PetscCall(PetscSFDestroy(&sf));
1543 
1544   PetscCall(ISRestoreIndices(rowp, &rwant));
1545   PetscCall(ISRestoreIndices(colp, &cwant));
1546   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1547 
1548   /* Find out where my gcols should go */
1549   PetscCall(MatGetSize(aB, NULL, &ng));
1550   PetscCall(PetscMalloc1(ng, &gcdest));
1551   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1552   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1553   PetscCall(PetscSFSetFromOptions(sf));
1554   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1555   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1556   PetscCall(PetscSFDestroy(&sf));
1557 
1558   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1559   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1560   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1561   for (i = 0; i < m; i++) {
1562     PetscInt    row = rdest[i];
1563     PetscMPIInt rowner;
1564     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1565     for (j = ai[i]; j < ai[i + 1]; j++) {
1566       PetscInt    col = cdest[aj[j]];
1567       PetscMPIInt cowner;
1568       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1569       if (rowner == cowner) dnnz[i]++;
1570       else onnz[i]++;
1571     }
1572     for (j = bi[i]; j < bi[i + 1]; j++) {
1573       PetscInt    col = gcdest[bj[j]];
1574       PetscMPIInt cowner;
1575       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1576       if (rowner == cowner) dnnz[i]++;
1577       else onnz[i]++;
1578     }
1579   }
1580   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1581   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1582   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1583   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1584   PetscCall(PetscSFDestroy(&rowsf));
1585 
1586   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1587   PetscCall(MatSeqAIJGetArray(aA, &aa));
1588   PetscCall(MatSeqAIJGetArray(aB, &ba));
1589   for (i = 0; i < m; i++) {
1590     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1591     PetscInt  j0, rowlen;
1592     rowlen = ai[i + 1] - ai[i];
1593     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1594       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1595       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1596     }
1597     rowlen = bi[i + 1] - bi[i];
1598     for (j0 = j = 0; j < rowlen; j0 = j) {
1599       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1600       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1601     }
1602   }
1603   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1604   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1605   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1606   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1607   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1608   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1609   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1610   PetscCall(PetscFree3(work, rdest, cdest));
1611   PetscCall(PetscFree(gcdest));
1612   if (parcolp) PetscCall(ISDestroy(&colp));
1613   *B = Aperm;
1614   PetscFunctionReturn(PETSC_SUCCESS);
1615 }
1616 
1617 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1618 {
1619   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1620 
1621   PetscFunctionBegin;
1622   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1623   if (ghosts) *ghosts = aij->garray;
1624   PetscFunctionReturn(PETSC_SUCCESS);
1625 }
1626 
1627 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1628 {
1629   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1630   Mat            A = mat->A, B = mat->B;
1631   PetscLogDouble isend[5], irecv[5];
1632 
1633   PetscFunctionBegin;
1634   info->block_size = 1.0;
1635   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1636 
1637   isend[0] = info->nz_used;
1638   isend[1] = info->nz_allocated;
1639   isend[2] = info->nz_unneeded;
1640   isend[3] = info->memory;
1641   isend[4] = info->mallocs;
1642 
1643   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1644 
1645   isend[0] += info->nz_used;
1646   isend[1] += info->nz_allocated;
1647   isend[2] += info->nz_unneeded;
1648   isend[3] += info->memory;
1649   isend[4] += info->mallocs;
1650   if (flag == MAT_LOCAL) {
1651     info->nz_used      = isend[0];
1652     info->nz_allocated = isend[1];
1653     info->nz_unneeded  = isend[2];
1654     info->memory       = isend[3];
1655     info->mallocs      = isend[4];
1656   } else if (flag == MAT_GLOBAL_MAX) {
1657     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1658 
1659     info->nz_used      = irecv[0];
1660     info->nz_allocated = irecv[1];
1661     info->nz_unneeded  = irecv[2];
1662     info->memory       = irecv[3];
1663     info->mallocs      = irecv[4];
1664   } else if (flag == MAT_GLOBAL_SUM) {
1665     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1666 
1667     info->nz_used      = irecv[0];
1668     info->nz_allocated = irecv[1];
1669     info->nz_unneeded  = irecv[2];
1670     info->memory       = irecv[3];
1671     info->mallocs      = irecv[4];
1672   }
1673   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1674   info->fill_ratio_needed = 0;
1675   info->factor_mallocs    = 0;
1676   PetscFunctionReturn(PETSC_SUCCESS);
1677 }
1678 
1679 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1680 {
1681   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1682 
1683   PetscFunctionBegin;
1684   switch (op) {
1685   case MAT_NEW_NONZERO_LOCATIONS:
1686   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1687   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1688   case MAT_KEEP_NONZERO_PATTERN:
1689   case MAT_NEW_NONZERO_LOCATION_ERR:
1690   case MAT_USE_INODES:
1691   case MAT_IGNORE_ZERO_ENTRIES:
1692   case MAT_FORM_EXPLICIT_TRANSPOSE:
1693     MatCheckPreallocated(A, 1);
1694     PetscCall(MatSetOption(a->A, op, flg));
1695     PetscCall(MatSetOption(a->B, op, flg));
1696     break;
1697   case MAT_ROW_ORIENTED:
1698     MatCheckPreallocated(A, 1);
1699     a->roworiented = flg;
1700 
1701     PetscCall(MatSetOption(a->A, op, flg));
1702     PetscCall(MatSetOption(a->B, op, flg));
1703     break;
1704   case MAT_FORCE_DIAGONAL_ENTRIES:
1705   case MAT_SORTED_FULL:
1706     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1707     break;
1708   case MAT_IGNORE_OFF_PROC_ENTRIES:
1709     a->donotstash = flg;
1710     break;
1711   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1712   case MAT_SPD:
1713   case MAT_SYMMETRIC:
1714   case MAT_STRUCTURALLY_SYMMETRIC:
1715   case MAT_HERMITIAN:
1716   case MAT_SYMMETRY_ETERNAL:
1717   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1718   case MAT_SPD_ETERNAL:
1719     /* if the diagonal matrix is square it inherits some of the properties above */
1720     break;
1721   case MAT_SUBMAT_SINGLEIS:
1722     A->submat_singleis = flg;
1723     break;
1724   case MAT_STRUCTURE_ONLY:
1725     /* The option is handled directly by MatSetOption() */
1726     break;
1727   default:
1728     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1729   }
1730   PetscFunctionReturn(PETSC_SUCCESS);
1731 }
1732 
1733 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1734 {
1735   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1736   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1737   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1738   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1739   PetscInt    *cmap, *idx_p;
1740 
1741   PetscFunctionBegin;
1742   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1743   mat->getrowactive = PETSC_TRUE;
1744 
1745   if (!mat->rowvalues && (idx || v)) {
1746     /*
1747         allocate enough space to hold information from the longest row.
1748     */
1749     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1750     PetscInt    max = 1, tmp;
1751     for (i = 0; i < matin->rmap->n; i++) {
1752       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1753       if (max < tmp) max = tmp;
1754     }
1755     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1756   }
1757 
1758   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1759   lrow = row - rstart;
1760 
1761   pvA = &vworkA;
1762   pcA = &cworkA;
1763   pvB = &vworkB;
1764   pcB = &cworkB;
1765   if (!v) {
1766     pvA = NULL;
1767     pvB = NULL;
1768   }
1769   if (!idx) {
1770     pcA = NULL;
1771     if (!v) pcB = NULL;
1772   }
1773   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1774   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1775   nztot = nzA + nzB;
1776 
1777   cmap = mat->garray;
1778   if (v || idx) {
1779     if (nztot) {
1780       /* Sort by increasing column numbers, assuming A and B already sorted */
1781       PetscInt imark = -1;
1782       if (v) {
1783         *v = v_p = mat->rowvalues;
1784         for (i = 0; i < nzB; i++) {
1785           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1786           else break;
1787         }
1788         imark = i;
1789         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1790         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1791       }
1792       if (idx) {
1793         *idx = idx_p = mat->rowindices;
1794         if (imark > -1) {
1795           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1796         } else {
1797           for (i = 0; i < nzB; i++) {
1798             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1799             else break;
1800           }
1801           imark = i;
1802         }
1803         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1804         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1805       }
1806     } else {
1807       if (idx) *idx = NULL;
1808       if (v) *v = NULL;
1809     }
1810   }
1811   *nz = nztot;
1812   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1813   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1814   PetscFunctionReturn(PETSC_SUCCESS);
1815 }
1816 
1817 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1818 {
1819   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1820 
1821   PetscFunctionBegin;
1822   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1823   aij->getrowactive = PETSC_FALSE;
1824   PetscFunctionReturn(PETSC_SUCCESS);
1825 }
1826 
1827 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1828 {
1829   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1830   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1831   PetscInt         i, j, cstart = mat->cmap->rstart;
1832   PetscReal        sum = 0.0;
1833   const MatScalar *v, *amata, *bmata;
1834 
1835   PetscFunctionBegin;
1836   if (aij->size == 1) {
1837     PetscCall(MatNorm(aij->A, type, norm));
1838   } else {
1839     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1840     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1841     if (type == NORM_FROBENIUS) {
1842       v = amata;
1843       for (i = 0; i < amat->nz; i++) {
1844         sum += PetscRealPart(PetscConj(*v) * (*v));
1845         v++;
1846       }
1847       v = bmata;
1848       for (i = 0; i < bmat->nz; i++) {
1849         sum += PetscRealPart(PetscConj(*v) * (*v));
1850         v++;
1851       }
1852       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1853       *norm = PetscSqrtReal(*norm);
1854       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1855     } else if (type == NORM_1) { /* max column norm */
1856       PetscReal *tmp, *tmp2;
1857       PetscInt  *jj, *garray = aij->garray;
1858       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1859       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1860       *norm = 0.0;
1861       v     = amata;
1862       jj    = amat->j;
1863       for (j = 0; j < amat->nz; j++) {
1864         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1865         v++;
1866       }
1867       v  = bmata;
1868       jj = bmat->j;
1869       for (j = 0; j < bmat->nz; j++) {
1870         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1871         v++;
1872       }
1873       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1874       for (j = 0; j < mat->cmap->N; j++) {
1875         if (tmp2[j] > *norm) *norm = tmp2[j];
1876       }
1877       PetscCall(PetscFree(tmp));
1878       PetscCall(PetscFree(tmp2));
1879       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1880     } else if (type == NORM_INFINITY) { /* max row norm */
1881       PetscReal ntemp = 0.0;
1882       for (j = 0; j < aij->A->rmap->n; j++) {
1883         v   = amata + amat->i[j];
1884         sum = 0.0;
1885         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1886           sum += PetscAbsScalar(*v);
1887           v++;
1888         }
1889         v = bmata + bmat->i[j];
1890         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1891           sum += PetscAbsScalar(*v);
1892           v++;
1893         }
1894         if (sum > ntemp) ntemp = sum;
1895       }
1896       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1897       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1898     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1899     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1900     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1901   }
1902   PetscFunctionReturn(PETSC_SUCCESS);
1903 }
1904 
1905 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1906 {
1907   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1908   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1909   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1910   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1911   Mat              B, A_diag, *B_diag;
1912   const MatScalar *pbv, *bv;
1913 
1914   PetscFunctionBegin;
1915   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1916   ma = A->rmap->n;
1917   na = A->cmap->n;
1918   mb = a->B->rmap->n;
1919   nb = a->B->cmap->n;
1920   ai = Aloc->i;
1921   aj = Aloc->j;
1922   bi = Bloc->i;
1923   bj = Bloc->j;
1924   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1925     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1926     PetscSFNode         *oloc;
1927     PETSC_UNUSED PetscSF sf;
1928 
1929     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1930     /* compute d_nnz for preallocation */
1931     PetscCall(PetscArrayzero(d_nnz, na));
1932     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1933     /* compute local off-diagonal contributions */
1934     PetscCall(PetscArrayzero(g_nnz, nb));
1935     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1936     /* map those to global */
1937     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1938     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1939     PetscCall(PetscSFSetFromOptions(sf));
1940     PetscCall(PetscArrayzero(o_nnz, na));
1941     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1942     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1943     PetscCall(PetscSFDestroy(&sf));
1944 
1945     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1946     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1947     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1948     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1949     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1950     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1951   } else {
1952     B = *matout;
1953     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1954   }
1955 
1956   b           = (Mat_MPIAIJ *)B->data;
1957   A_diag      = a->A;
1958   B_diag      = &b->A;
1959   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1960   A_diag_ncol = A_diag->cmap->N;
1961   B_diag_ilen = sub_B_diag->ilen;
1962   B_diag_i    = sub_B_diag->i;
1963 
1964   /* Set ilen for diagonal of B */
1965   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1966 
1967   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1968   very quickly (=without using MatSetValues), because all writes are local. */
1969   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1970   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1971 
1972   /* copy over the B part */
1973   PetscCall(PetscMalloc1(bi[mb], &cols));
1974   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1975   pbv = bv;
1976   row = A->rmap->rstart;
1977   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1978   cols_tmp = cols;
1979   for (i = 0; i < mb; i++) {
1980     ncol = bi[i + 1] - bi[i];
1981     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1982     row++;
1983     pbv += ncol;
1984     cols_tmp += ncol;
1985   }
1986   PetscCall(PetscFree(cols));
1987   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1988 
1989   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1990   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1991   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1992     *matout = B;
1993   } else {
1994     PetscCall(MatHeaderMerge(A, &B));
1995   }
1996   PetscFunctionReturn(PETSC_SUCCESS);
1997 }
1998 
1999 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2002   Mat         a = aij->A, b = aij->B;
2003   PetscInt    s1, s2, s3;
2004 
2005   PetscFunctionBegin;
2006   PetscCall(MatGetLocalSize(mat, &s2, &s3));
2007   if (rr) {
2008     PetscCall(VecGetLocalSize(rr, &s1));
2009     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
2010     /* Overlap communication with computation. */
2011     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2012   }
2013   if (ll) {
2014     PetscCall(VecGetLocalSize(ll, &s1));
2015     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
2016     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
2017   }
2018   /* scale  the diagonal block */
2019   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2020 
2021   if (rr) {
2022     /* Do a scatter end and then right scale the off-diagonal block */
2023     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2024     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2025   }
2026   PetscFunctionReturn(PETSC_SUCCESS);
2027 }
2028 
2029 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2030 {
2031   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2032 
2033   PetscFunctionBegin;
2034   PetscCall(MatSetUnfactored(a->A));
2035   PetscFunctionReturn(PETSC_SUCCESS);
2036 }
2037 
2038 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2039 {
2040   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2041   Mat         a, b, c, d;
2042   PetscBool   flg;
2043 
2044   PetscFunctionBegin;
2045   a = matA->A;
2046   b = matA->B;
2047   c = matB->A;
2048   d = matB->B;
2049 
2050   PetscCall(MatEqual(a, c, &flg));
2051   if (flg) PetscCall(MatEqual(b, d, &flg));
2052   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2053   PetscFunctionReturn(PETSC_SUCCESS);
2054 }
2055 
2056 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2057 {
2058   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2059   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2060 
2061   PetscFunctionBegin;
2062   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2063   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2064     /* because of the column compression in the off-processor part of the matrix a->B,
2065        the number of columns in a->B and b->B may be different, hence we cannot call
2066        the MatCopy() directly on the two parts. If need be, we can provide a more
2067        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2068        then copying the submatrices */
2069     PetscCall(MatCopy_Basic(A, B, str));
2070   } else {
2071     PetscCall(MatCopy(a->A, b->A, str));
2072     PetscCall(MatCopy(a->B, b->B, str));
2073   }
2074   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2075   PetscFunctionReturn(PETSC_SUCCESS);
2076 }
2077 
2078 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2079 {
2080   PetscFunctionBegin;
2081   PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
2082   PetscFunctionReturn(PETSC_SUCCESS);
2083 }
2084 
2085 /*
2086    Computes the number of nonzeros per row needed for preallocation when X and Y
2087    have different nonzero structure.
2088 */
2089 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2090 {
2091   PetscInt i, j, k, nzx, nzy;
2092 
2093   PetscFunctionBegin;
2094   /* Set the number of nonzeros in the new matrix */
2095   for (i = 0; i < m; i++) {
2096     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2097     nzx    = xi[i + 1] - xi[i];
2098     nzy    = yi[i + 1] - yi[i];
2099     nnz[i] = 0;
2100     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2101       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2102       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2103       nnz[i]++;
2104     }
2105     for (; k < nzy; k++) nnz[i]++;
2106   }
2107   PetscFunctionReturn(PETSC_SUCCESS);
2108 }
2109 
2110 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2111 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2112 {
2113   PetscInt    m = Y->rmap->N;
2114   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2115   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2116 
2117   PetscFunctionBegin;
2118   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2119   PetscFunctionReturn(PETSC_SUCCESS);
2120 }
2121 
2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2123 {
2124   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2125 
2126   PetscFunctionBegin;
2127   if (str == SAME_NONZERO_PATTERN) {
2128     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2129     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2130   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2131     PetscCall(MatAXPY_Basic(Y, a, X, str));
2132   } else {
2133     Mat       B;
2134     PetscInt *nnz_d, *nnz_o;
2135 
2136     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2137     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2138     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2139     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2140     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2141     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2142     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2143     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2144     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2145     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2146     PetscCall(MatHeaderMerge(Y, &B));
2147     PetscCall(PetscFree(nnz_d));
2148     PetscCall(PetscFree(nnz_o));
2149   }
2150   PetscFunctionReturn(PETSC_SUCCESS);
2151 }
2152 
2153 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2154 
2155 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2156 {
2157   PetscFunctionBegin;
2158   if (PetscDefined(USE_COMPLEX)) {
2159     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2160 
2161     PetscCall(MatConjugate_SeqAIJ(aij->A));
2162     PetscCall(MatConjugate_SeqAIJ(aij->B));
2163   }
2164   PetscFunctionReturn(PETSC_SUCCESS);
2165 }
2166 
2167 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2168 {
2169   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2170 
2171   PetscFunctionBegin;
2172   PetscCall(MatRealPart(a->A));
2173   PetscCall(MatRealPart(a->B));
2174   PetscFunctionReturn(PETSC_SUCCESS);
2175 }
2176 
2177 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2178 {
2179   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2180 
2181   PetscFunctionBegin;
2182   PetscCall(MatImaginaryPart(a->A));
2183   PetscCall(MatImaginaryPart(a->B));
2184   PetscFunctionReturn(PETSC_SUCCESS);
2185 }
2186 
2187 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2188 {
2189   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2190   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2191   PetscScalar       *va, *vv;
2192   Vec                vB, vA;
2193   const PetscScalar *vb;
2194 
2195   PetscFunctionBegin;
2196   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2197   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2198 
2199   PetscCall(VecGetArrayWrite(vA, &va));
2200   if (idx) {
2201     for (i = 0; i < m; i++) {
2202       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2203     }
2204   }
2205 
2206   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2207   PetscCall(PetscMalloc1(m, &idxb));
2208   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2209 
2210   PetscCall(VecGetArrayWrite(v, &vv));
2211   PetscCall(VecGetArrayRead(vB, &vb));
2212   for (i = 0; i < m; i++) {
2213     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2214       vv[i] = vb[i];
2215       if (idx) idx[i] = a->garray[idxb[i]];
2216     } else {
2217       vv[i] = va[i];
2218       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2219     }
2220   }
2221   PetscCall(VecRestoreArrayWrite(vA, &vv));
2222   PetscCall(VecRestoreArrayWrite(vA, &va));
2223   PetscCall(VecRestoreArrayRead(vB, &vb));
2224   PetscCall(PetscFree(idxb));
2225   PetscCall(VecDestroy(&vA));
2226   PetscCall(VecDestroy(&vB));
2227   PetscFunctionReturn(PETSC_SUCCESS);
2228 }
2229 
2230 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2231 {
2232   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2233   PetscInt           m = A->rmap->n, n = A->cmap->n;
2234   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2235   PetscInt          *cmap = mat->garray;
2236   PetscInt          *diagIdx, *offdiagIdx;
2237   Vec                diagV, offdiagV;
2238   PetscScalar       *a, *diagA, *offdiagA;
2239   const PetscScalar *ba, *bav;
2240   PetscInt           r, j, col, ncols, *bi, *bj;
2241   Mat                B = mat->B;
2242   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2243 
2244   PetscFunctionBegin;
2245   /* When a process holds entire A and other processes have no entry */
2246   if (A->cmap->N == n) {
2247     PetscCall(VecGetArrayWrite(v, &diagA));
2248     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2249     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2250     PetscCall(VecDestroy(&diagV));
2251     PetscCall(VecRestoreArrayWrite(v, &diagA));
2252     PetscFunctionReturn(PETSC_SUCCESS);
2253   } else if (n == 0) {
2254     if (m) {
2255       PetscCall(VecGetArrayWrite(v, &a));
2256       for (r = 0; r < m; r++) {
2257         a[r] = 0.0;
2258         if (idx) idx[r] = -1;
2259       }
2260       PetscCall(VecRestoreArrayWrite(v, &a));
2261     }
2262     PetscFunctionReturn(PETSC_SUCCESS);
2263   }
2264 
2265   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2266   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2267   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2268   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2269 
2270   /* Get offdiagIdx[] for implicit 0.0 */
2271   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2272   ba = bav;
2273   bi = b->i;
2274   bj = b->j;
2275   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2276   for (r = 0; r < m; r++) {
2277     ncols = bi[r + 1] - bi[r];
2278     if (ncols == A->cmap->N - n) { /* Brow is dense */
2279       offdiagA[r]   = *ba;
2280       offdiagIdx[r] = cmap[0];
2281     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2282       offdiagA[r] = 0.0;
2283 
2284       /* Find first hole in the cmap */
2285       for (j = 0; j < ncols; j++) {
2286         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2287         if (col > j && j < cstart) {
2288           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2289           break;
2290         } else if (col > j + n && j >= cstart) {
2291           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2292           break;
2293         }
2294       }
2295       if (j == ncols && ncols < A->cmap->N - n) {
2296         /* a hole is outside compressed Bcols */
2297         if (ncols == 0) {
2298           if (cstart) {
2299             offdiagIdx[r] = 0;
2300           } else offdiagIdx[r] = cend;
2301         } else { /* ncols > 0 */
2302           offdiagIdx[r] = cmap[ncols - 1] + 1;
2303           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2304         }
2305       }
2306     }
2307 
2308     for (j = 0; j < ncols; j++) {
2309       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2310         offdiagA[r]   = *ba;
2311         offdiagIdx[r] = cmap[*bj];
2312       }
2313       ba++;
2314       bj++;
2315     }
2316   }
2317 
2318   PetscCall(VecGetArrayWrite(v, &a));
2319   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2320   for (r = 0; r < m; ++r) {
2321     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2322       a[r] = diagA[r];
2323       if (idx) idx[r] = cstart + diagIdx[r];
2324     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2325       a[r] = diagA[r];
2326       if (idx) {
2327         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2328           idx[r] = cstart + diagIdx[r];
2329         } else idx[r] = offdiagIdx[r];
2330       }
2331     } else {
2332       a[r] = offdiagA[r];
2333       if (idx) idx[r] = offdiagIdx[r];
2334     }
2335   }
2336   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2337   PetscCall(VecRestoreArrayWrite(v, &a));
2338   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2339   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2340   PetscCall(VecDestroy(&diagV));
2341   PetscCall(VecDestroy(&offdiagV));
2342   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2343   PetscFunctionReturn(PETSC_SUCCESS);
2344 }
2345 
2346 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2347 {
2348   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2349   PetscInt           m = A->rmap->n, n = A->cmap->n;
2350   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2351   PetscInt          *cmap = mat->garray;
2352   PetscInt          *diagIdx, *offdiagIdx;
2353   Vec                diagV, offdiagV;
2354   PetscScalar       *a, *diagA, *offdiagA;
2355   const PetscScalar *ba, *bav;
2356   PetscInt           r, j, col, ncols, *bi, *bj;
2357   Mat                B = mat->B;
2358   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2359 
2360   PetscFunctionBegin;
2361   /* When a process holds entire A and other processes have no entry */
2362   if (A->cmap->N == n) {
2363     PetscCall(VecGetArrayWrite(v, &diagA));
2364     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2365     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2366     PetscCall(VecDestroy(&diagV));
2367     PetscCall(VecRestoreArrayWrite(v, &diagA));
2368     PetscFunctionReturn(PETSC_SUCCESS);
2369   } else if (n == 0) {
2370     if (m) {
2371       PetscCall(VecGetArrayWrite(v, &a));
2372       for (r = 0; r < m; r++) {
2373         a[r] = PETSC_MAX_REAL;
2374         if (idx) idx[r] = -1;
2375       }
2376       PetscCall(VecRestoreArrayWrite(v, &a));
2377     }
2378     PetscFunctionReturn(PETSC_SUCCESS);
2379   }
2380 
2381   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2382   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2383   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2384   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2385 
2386   /* Get offdiagIdx[] for implicit 0.0 */
2387   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2388   ba = bav;
2389   bi = b->i;
2390   bj = b->j;
2391   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2392   for (r = 0; r < m; r++) {
2393     ncols = bi[r + 1] - bi[r];
2394     if (ncols == A->cmap->N - n) { /* Brow is dense */
2395       offdiagA[r]   = *ba;
2396       offdiagIdx[r] = cmap[0];
2397     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2398       offdiagA[r] = 0.0;
2399 
2400       /* Find first hole in the cmap */
2401       for (j = 0; j < ncols; j++) {
2402         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2403         if (col > j && j < cstart) {
2404           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2405           break;
2406         } else if (col > j + n && j >= cstart) {
2407           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2408           break;
2409         }
2410       }
2411       if (j == ncols && ncols < A->cmap->N - n) {
2412         /* a hole is outside compressed Bcols */
2413         if (ncols == 0) {
2414           if (cstart) {
2415             offdiagIdx[r] = 0;
2416           } else offdiagIdx[r] = cend;
2417         } else { /* ncols > 0 */
2418           offdiagIdx[r] = cmap[ncols - 1] + 1;
2419           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2420         }
2421       }
2422     }
2423 
2424     for (j = 0; j < ncols; j++) {
2425       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2426         offdiagA[r]   = *ba;
2427         offdiagIdx[r] = cmap[*bj];
2428       }
2429       ba++;
2430       bj++;
2431     }
2432   }
2433 
2434   PetscCall(VecGetArrayWrite(v, &a));
2435   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2436   for (r = 0; r < m; ++r) {
2437     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2438       a[r] = diagA[r];
2439       if (idx) idx[r] = cstart + diagIdx[r];
2440     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2441       a[r] = diagA[r];
2442       if (idx) {
2443         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2444           idx[r] = cstart + diagIdx[r];
2445         } else idx[r] = offdiagIdx[r];
2446       }
2447     } else {
2448       a[r] = offdiagA[r];
2449       if (idx) idx[r] = offdiagIdx[r];
2450     }
2451   }
2452   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2453   PetscCall(VecRestoreArrayWrite(v, &a));
2454   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2455   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2456   PetscCall(VecDestroy(&diagV));
2457   PetscCall(VecDestroy(&offdiagV));
2458   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2459   PetscFunctionReturn(PETSC_SUCCESS);
2460 }
2461 
2462 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2463 {
2464   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2465   PetscInt           m = A->rmap->n, n = A->cmap->n;
2466   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2467   PetscInt          *cmap = mat->garray;
2468   PetscInt          *diagIdx, *offdiagIdx;
2469   Vec                diagV, offdiagV;
2470   PetscScalar       *a, *diagA, *offdiagA;
2471   const PetscScalar *ba, *bav;
2472   PetscInt           r, j, col, ncols, *bi, *bj;
2473   Mat                B = mat->B;
2474   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2475 
2476   PetscFunctionBegin;
2477   /* When a process holds entire A and other processes have no entry */
2478   if (A->cmap->N == n) {
2479     PetscCall(VecGetArrayWrite(v, &diagA));
2480     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2481     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2482     PetscCall(VecDestroy(&diagV));
2483     PetscCall(VecRestoreArrayWrite(v, &diagA));
2484     PetscFunctionReturn(PETSC_SUCCESS);
2485   } else if (n == 0) {
2486     if (m) {
2487       PetscCall(VecGetArrayWrite(v, &a));
2488       for (r = 0; r < m; r++) {
2489         a[r] = PETSC_MIN_REAL;
2490         if (idx) idx[r] = -1;
2491       }
2492       PetscCall(VecRestoreArrayWrite(v, &a));
2493     }
2494     PetscFunctionReturn(PETSC_SUCCESS);
2495   }
2496 
2497   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2498   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2499   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2500   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2501 
2502   /* Get offdiagIdx[] for implicit 0.0 */
2503   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2504   ba = bav;
2505   bi = b->i;
2506   bj = b->j;
2507   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2508   for (r = 0; r < m; r++) {
2509     ncols = bi[r + 1] - bi[r];
2510     if (ncols == A->cmap->N - n) { /* Brow is dense */
2511       offdiagA[r]   = *ba;
2512       offdiagIdx[r] = cmap[0];
2513     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2514       offdiagA[r] = 0.0;
2515 
2516       /* Find first hole in the cmap */
2517       for (j = 0; j < ncols; j++) {
2518         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2519         if (col > j && j < cstart) {
2520           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2521           break;
2522         } else if (col > j + n && j >= cstart) {
2523           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2524           break;
2525         }
2526       }
2527       if (j == ncols && ncols < A->cmap->N - n) {
2528         /* a hole is outside compressed Bcols */
2529         if (ncols == 0) {
2530           if (cstart) {
2531             offdiagIdx[r] = 0;
2532           } else offdiagIdx[r] = cend;
2533         } else { /* ncols > 0 */
2534           offdiagIdx[r] = cmap[ncols - 1] + 1;
2535           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2536         }
2537       }
2538     }
2539 
2540     for (j = 0; j < ncols; j++) {
2541       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2542         offdiagA[r]   = *ba;
2543         offdiagIdx[r] = cmap[*bj];
2544       }
2545       ba++;
2546       bj++;
2547     }
2548   }
2549 
2550   PetscCall(VecGetArrayWrite(v, &a));
2551   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2552   for (r = 0; r < m; ++r) {
2553     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2554       a[r] = diagA[r];
2555       if (idx) idx[r] = cstart + diagIdx[r];
2556     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2557       a[r] = diagA[r];
2558       if (idx) {
2559         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2560           idx[r] = cstart + diagIdx[r];
2561         } else idx[r] = offdiagIdx[r];
2562       }
2563     } else {
2564       a[r] = offdiagA[r];
2565       if (idx) idx[r] = offdiagIdx[r];
2566     }
2567   }
2568   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2569   PetscCall(VecRestoreArrayWrite(v, &a));
2570   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2571   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2572   PetscCall(VecDestroy(&diagV));
2573   PetscCall(VecDestroy(&offdiagV));
2574   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2575   PetscFunctionReturn(PETSC_SUCCESS);
2576 }
2577 
2578 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2579 {
2580   Mat *dummy;
2581 
2582   PetscFunctionBegin;
2583   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2584   *newmat = *dummy;
2585   PetscCall(PetscFree(dummy));
2586   PetscFunctionReturn(PETSC_SUCCESS);
2587 }
2588 
2589 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2590 {
2591   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2592 
2593   PetscFunctionBegin;
2594   PetscCall(MatInvertBlockDiagonal(a->A, values));
2595   A->factorerrortype = a->A->factorerrortype;
2596   PetscFunctionReturn(PETSC_SUCCESS);
2597 }
2598 
2599 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2600 {
2601   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2602 
2603   PetscFunctionBegin;
2604   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2605   PetscCall(MatSetRandom(aij->A, rctx));
2606   if (x->assembled) {
2607     PetscCall(MatSetRandom(aij->B, rctx));
2608   } else {
2609     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2610   }
2611   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2612   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2613   PetscFunctionReturn(PETSC_SUCCESS);
2614 }
2615 
2616 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2617 {
2618   PetscFunctionBegin;
2619   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2620   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2621   PetscFunctionReturn(PETSC_SUCCESS);
2622 }
2623 
2624 /*@
2625    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2626 
2627    Not collective
2628 
2629    Input Parameter:
2630 .    A - the matrix
2631 
2632    Output Parameter:
2633 .    nz - the number of nonzeros
2634 
2635  Level: advanced
2636 
2637 .seealso: `MATMPIAIJ`, `Mat`
2638 @*/
2639 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2640 {
2641   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2642   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2643 
2644   PetscFunctionBegin;
2645   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2646   PetscFunctionReturn(PETSC_SUCCESS);
2647 }
2648 
2649 /*@
2650    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2651 
2652    Collective
2653 
2654    Input Parameters:
2655 +    A - the matrix
2656 -    sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2657 
2658  Level: advanced
2659 
2660 @*/
2661 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2662 {
2663   PetscFunctionBegin;
2664   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2665   PetscFunctionReturn(PETSC_SUCCESS);
2666 }
2667 
2668 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2669 {
2670   PetscBool sc = PETSC_FALSE, flg;
2671 
2672   PetscFunctionBegin;
2673   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2674   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2675   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2676   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2677   PetscOptionsHeadEnd();
2678   PetscFunctionReturn(PETSC_SUCCESS);
2679 }
2680 
2681 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2682 {
2683   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2684   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2685 
2686   PetscFunctionBegin;
2687   if (!Y->preallocated) {
2688     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2689   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2690     PetscInt nonew = aij->nonew;
2691     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2692     aij->nonew = nonew;
2693   }
2694   PetscCall(MatShift_Basic(Y, a));
2695   PetscFunctionReturn(PETSC_SUCCESS);
2696 }
2697 
2698 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2699 {
2700   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2701 
2702   PetscFunctionBegin;
2703   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2704   PetscCall(MatMissingDiagonal(a->A, missing, d));
2705   if (d) {
2706     PetscInt rstart;
2707     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2708     *d += rstart;
2709   }
2710   PetscFunctionReturn(PETSC_SUCCESS);
2711 }
2712 
2713 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2714 {
2715   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2716 
2717   PetscFunctionBegin;
2718   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2719   PetscFunctionReturn(PETSC_SUCCESS);
2720 }
2721 
2722 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A)
2723 {
2724   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2725 
2726   PetscFunctionBegin;
2727   PetscCall(MatEliminateZeros(a->A));
2728   PetscCall(MatEliminateZeros(a->B));
2729   PetscFunctionReturn(PETSC_SUCCESS);
2730 }
2731 
2732 /* -------------------------------------------------------------------*/
2733 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2734                                        MatGetRow_MPIAIJ,
2735                                        MatRestoreRow_MPIAIJ,
2736                                        MatMult_MPIAIJ,
2737                                        /* 4*/ MatMultAdd_MPIAIJ,
2738                                        MatMultTranspose_MPIAIJ,
2739                                        MatMultTransposeAdd_MPIAIJ,
2740                                        NULL,
2741                                        NULL,
2742                                        NULL,
2743                                        /*10*/ NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        MatSOR_MPIAIJ,
2747                                        MatTranspose_MPIAIJ,
2748                                        /*15*/ MatGetInfo_MPIAIJ,
2749                                        MatEqual_MPIAIJ,
2750                                        MatGetDiagonal_MPIAIJ,
2751                                        MatDiagonalScale_MPIAIJ,
2752                                        MatNorm_MPIAIJ,
2753                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2754                                        MatAssemblyEnd_MPIAIJ,
2755                                        MatSetOption_MPIAIJ,
2756                                        MatZeroEntries_MPIAIJ,
2757                                        /*24*/ MatZeroRows_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        /*29*/ MatSetUp_MPIAIJ,
2763                                        NULL,
2764                                        NULL,
2765                                        MatGetDiagonalBlock_MPIAIJ,
2766                                        NULL,
2767                                        /*34*/ MatDuplicate_MPIAIJ,
2768                                        NULL,
2769                                        NULL,
2770                                        NULL,
2771                                        NULL,
2772                                        /*39*/ MatAXPY_MPIAIJ,
2773                                        MatCreateSubMatrices_MPIAIJ,
2774                                        MatIncreaseOverlap_MPIAIJ,
2775                                        MatGetValues_MPIAIJ,
2776                                        MatCopy_MPIAIJ,
2777                                        /*44*/ MatGetRowMax_MPIAIJ,
2778                                        MatScale_MPIAIJ,
2779                                        MatShift_MPIAIJ,
2780                                        MatDiagonalSet_MPIAIJ,
2781                                        MatZeroRowsColumns_MPIAIJ,
2782                                        /*49*/ MatSetRandom_MPIAIJ,
2783                                        MatGetRowIJ_MPIAIJ,
2784                                        MatRestoreRowIJ_MPIAIJ,
2785                                        NULL,
2786                                        NULL,
2787                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2788                                        NULL,
2789                                        MatSetUnfactored_MPIAIJ,
2790                                        MatPermute_MPIAIJ,
2791                                        NULL,
2792                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2793                                        MatDestroy_MPIAIJ,
2794                                        MatView_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        /*64*/ NULL,
2798                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2799                                        NULL,
2800                                        NULL,
2801                                        NULL,
2802                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2803                                        MatGetRowMinAbs_MPIAIJ,
2804                                        NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        NULL,
2808                                        /*75*/ MatFDColoringApply_AIJ,
2809                                        MatSetFromOptions_MPIAIJ,
2810                                        NULL,
2811                                        NULL,
2812                                        MatFindZeroDiagonals_MPIAIJ,
2813                                        /*80*/ NULL,
2814                                        NULL,
2815                                        NULL,
2816                                        /*83*/ MatLoad_MPIAIJ,
2817                                        MatIsSymmetric_MPIAIJ,
2818                                        NULL,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                        /*89*/ NULL,
2823                                        NULL,
2824                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2825                                        NULL,
2826                                        NULL,
2827                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2828                                        NULL,
2829                                        NULL,
2830                                        NULL,
2831                                        MatBindToCPU_MPIAIJ,
2832                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2833                                        NULL,
2834                                        NULL,
2835                                        MatConjugate_MPIAIJ,
2836                                        NULL,
2837                                        /*104*/ MatSetValuesRow_MPIAIJ,
2838                                        MatRealPart_MPIAIJ,
2839                                        MatImaginaryPart_MPIAIJ,
2840                                        NULL,
2841                                        NULL,
2842                                        /*109*/ NULL,
2843                                        NULL,
2844                                        MatGetRowMin_MPIAIJ,
2845                                        NULL,
2846                                        MatMissingDiagonal_MPIAIJ,
2847                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2848                                        NULL,
2849                                        MatGetGhosts_MPIAIJ,
2850                                        NULL,
2851                                        NULL,
2852                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2853                                        NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        MatGetMultiProcBlock_MPIAIJ,
2857                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2858                                        MatGetColumnReductions_MPIAIJ,
2859                                        MatInvertBlockDiagonal_MPIAIJ,
2860                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2861                                        MatCreateSubMatricesMPI_MPIAIJ,
2862                                        /*129*/ NULL,
2863                                        NULL,
2864                                        NULL,
2865                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2866                                        NULL,
2867                                        /*134*/ NULL,
2868                                        NULL,
2869                                        NULL,
2870                                        NULL,
2871                                        NULL,
2872                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2873                                        NULL,
2874                                        NULL,
2875                                        MatFDColoringSetUp_MPIXAIJ,
2876                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2877                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2878                                        /*145*/ NULL,
2879                                        NULL,
2880                                        NULL,
2881                                        MatCreateGraph_Simple_AIJ,
2882                                        NULL,
2883                                        /*150*/ NULL,
2884                                        MatEliminateZeros_MPIAIJ};
2885 
2886 /* ----------------------------------------------------------------------------------------*/
2887 
2888 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2889 {
2890   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2891 
2892   PetscFunctionBegin;
2893   PetscCall(MatStoreValues(aij->A));
2894   PetscCall(MatStoreValues(aij->B));
2895   PetscFunctionReturn(PETSC_SUCCESS);
2896 }
2897 
2898 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2899 {
2900   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2901 
2902   PetscFunctionBegin;
2903   PetscCall(MatRetrieveValues(aij->A));
2904   PetscCall(MatRetrieveValues(aij->B));
2905   PetscFunctionReturn(PETSC_SUCCESS);
2906 }
2907 
2908 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2909 {
2910   Mat_MPIAIJ *b;
2911   PetscMPIInt size;
2912 
2913   PetscFunctionBegin;
2914   PetscCall(PetscLayoutSetUp(B->rmap));
2915   PetscCall(PetscLayoutSetUp(B->cmap));
2916   b = (Mat_MPIAIJ *)B->data;
2917 
2918 #if defined(PETSC_USE_CTABLE)
2919   PetscCall(PetscHMapIDestroy(&b->colmap));
2920 #else
2921   PetscCall(PetscFree(b->colmap));
2922 #endif
2923   PetscCall(PetscFree(b->garray));
2924   PetscCall(VecDestroy(&b->lvec));
2925   PetscCall(VecScatterDestroy(&b->Mvctx));
2926 
2927   /* Because the B will have been resized we simply destroy it and create a new one each time */
2928   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2929   PetscCall(MatDestroy(&b->B));
2930   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2931   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2932   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2933   PetscCall(MatSetType(b->B, MATSEQAIJ));
2934 
2935   if (!B->preallocated) {
2936     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2937     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2938     PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2939     PetscCall(MatSetType(b->A, MATSEQAIJ));
2940   }
2941 
2942   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2943   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2944   B->preallocated  = PETSC_TRUE;
2945   B->was_assembled = PETSC_FALSE;
2946   B->assembled     = PETSC_FALSE;
2947   PetscFunctionReturn(PETSC_SUCCESS);
2948 }
2949 
2950 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2951 {
2952   Mat_MPIAIJ *b;
2953 
2954   PetscFunctionBegin;
2955   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2956   PetscCall(PetscLayoutSetUp(B->rmap));
2957   PetscCall(PetscLayoutSetUp(B->cmap));
2958   b = (Mat_MPIAIJ *)B->data;
2959 
2960 #if defined(PETSC_USE_CTABLE)
2961   PetscCall(PetscHMapIDestroy(&b->colmap));
2962 #else
2963   PetscCall(PetscFree(b->colmap));
2964 #endif
2965   PetscCall(PetscFree(b->garray));
2966   PetscCall(VecDestroy(&b->lvec));
2967   PetscCall(VecScatterDestroy(&b->Mvctx));
2968 
2969   PetscCall(MatResetPreallocation(b->A));
2970   PetscCall(MatResetPreallocation(b->B));
2971   B->preallocated  = PETSC_TRUE;
2972   B->was_assembled = PETSC_FALSE;
2973   B->assembled     = PETSC_FALSE;
2974   PetscFunctionReturn(PETSC_SUCCESS);
2975 }
2976 
2977 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2978 {
2979   Mat         mat;
2980   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2981 
2982   PetscFunctionBegin;
2983   *newmat = NULL;
2984   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2985   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2986   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2987   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2988   a = (Mat_MPIAIJ *)mat->data;
2989 
2990   mat->factortype   = matin->factortype;
2991   mat->assembled    = matin->assembled;
2992   mat->insertmode   = NOT_SET_VALUES;
2993   mat->preallocated = matin->preallocated;
2994 
2995   a->size         = oldmat->size;
2996   a->rank         = oldmat->rank;
2997   a->donotstash   = oldmat->donotstash;
2998   a->roworiented  = oldmat->roworiented;
2999   a->rowindices   = NULL;
3000   a->rowvalues    = NULL;
3001   a->getrowactive = PETSC_FALSE;
3002 
3003   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3004   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3005 
3006   if (oldmat->colmap) {
3007 #if defined(PETSC_USE_CTABLE)
3008     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3009 #else
3010     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3011     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3012 #endif
3013   } else a->colmap = NULL;
3014   if (oldmat->garray) {
3015     PetscInt len;
3016     len = oldmat->B->cmap->n;
3017     PetscCall(PetscMalloc1(len + 1, &a->garray));
3018     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3019   } else a->garray = NULL;
3020 
3021   /* It may happen MatDuplicate is called with a non-assembled matrix
3022      In fact, MatDuplicate only requires the matrix to be preallocated
3023      This may happen inside a DMCreateMatrix_Shell */
3024   if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); }
3025   if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); }
3026   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3027   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3028   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3029   *newmat = mat;
3030   PetscFunctionReturn(PETSC_SUCCESS);
3031 }
3032 
3033 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3034 {
3035   PetscBool isbinary, ishdf5;
3036 
3037   PetscFunctionBegin;
3038   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3039   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3040   /* force binary viewer to load .info file if it has not yet done so */
3041   PetscCall(PetscViewerSetUp(viewer));
3042   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3043   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3044   if (isbinary) {
3045     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3046   } else if (ishdf5) {
3047 #if defined(PETSC_HAVE_HDF5)
3048     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3049 #else
3050     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3051 #endif
3052   } else {
3053     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3054   }
3055   PetscFunctionReturn(PETSC_SUCCESS);
3056 }
3057 
3058 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3059 {
3060   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3061   PetscInt    *rowidxs, *colidxs;
3062   PetscScalar *matvals;
3063 
3064   PetscFunctionBegin;
3065   PetscCall(PetscViewerSetUp(viewer));
3066 
3067   /* read in matrix header */
3068   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3069   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3070   M  = header[1];
3071   N  = header[2];
3072   nz = header[3];
3073   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3074   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3075   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3076 
3077   /* set block sizes from the viewer's .info file */
3078   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3079   /* set global sizes if not set already */
3080   if (mat->rmap->N < 0) mat->rmap->N = M;
3081   if (mat->cmap->N < 0) mat->cmap->N = N;
3082   PetscCall(PetscLayoutSetUp(mat->rmap));
3083   PetscCall(PetscLayoutSetUp(mat->cmap));
3084 
3085   /* check if the matrix sizes are correct */
3086   PetscCall(MatGetSize(mat, &rows, &cols));
3087   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3088 
3089   /* read in row lengths and build row indices */
3090   PetscCall(MatGetLocalSize(mat, &m, NULL));
3091   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3092   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3093   rowidxs[0] = 0;
3094   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3095   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3096   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3097   /* read in column indices and matrix values */
3098   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3099   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3100   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3101   /* store matrix indices and values */
3102   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3103   PetscCall(PetscFree(rowidxs));
3104   PetscCall(PetscFree2(colidxs, matvals));
3105   PetscFunctionReturn(PETSC_SUCCESS);
3106 }
3107 
3108 /* Not scalable because of ISAllGather() unless getting all columns. */
3109 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3110 {
3111   IS          iscol_local;
3112   PetscBool   isstride;
3113   PetscMPIInt lisstride = 0, gisstride;
3114 
3115   PetscFunctionBegin;
3116   /* check if we are grabbing all columns*/
3117   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3118 
3119   if (isstride) {
3120     PetscInt start, len, mstart, mlen;
3121     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3122     PetscCall(ISGetLocalSize(iscol, &len));
3123     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3124     if (mstart == start && mlen - mstart == len) lisstride = 1;
3125   }
3126 
3127   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3128   if (gisstride) {
3129     PetscInt N;
3130     PetscCall(MatGetSize(mat, NULL, &N));
3131     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3132     PetscCall(ISSetIdentity(iscol_local));
3133     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3134   } else {
3135     PetscInt cbs;
3136     PetscCall(ISGetBlockSize(iscol, &cbs));
3137     PetscCall(ISAllGather(iscol, &iscol_local));
3138     PetscCall(ISSetBlockSize(iscol_local, cbs));
3139   }
3140 
3141   *isseq = iscol_local;
3142   PetscFunctionReturn(PETSC_SUCCESS);
3143 }
3144 
3145 /*
3146  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3147  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3148 
3149  Input Parameters:
3150    mat - matrix
3151    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3152            i.e., mat->rstart <= isrow[i] < mat->rend
3153    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3154            i.e., mat->cstart <= iscol[i] < mat->cend
3155  Output Parameter:
3156    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3157    iscol_o - sequential column index set for retrieving mat->B
3158    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3159  */
3160 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3161 {
3162   Vec             x, cmap;
3163   const PetscInt *is_idx;
3164   PetscScalar    *xarray, *cmaparray;
3165   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3166   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3167   Mat             B    = a->B;
3168   Vec             lvec = a->lvec, lcmap;
3169   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3170   MPI_Comm        comm;
3171   VecScatter      Mvctx = a->Mvctx;
3172 
3173   PetscFunctionBegin;
3174   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3175   PetscCall(ISGetLocalSize(iscol, &ncols));
3176 
3177   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3178   PetscCall(MatCreateVecs(mat, &x, NULL));
3179   PetscCall(VecSet(x, -1.0));
3180   PetscCall(VecDuplicate(x, &cmap));
3181   PetscCall(VecSet(cmap, -1.0));
3182 
3183   /* Get start indices */
3184   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3185   isstart -= ncols;
3186   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3187 
3188   PetscCall(ISGetIndices(iscol, &is_idx));
3189   PetscCall(VecGetArray(x, &xarray));
3190   PetscCall(VecGetArray(cmap, &cmaparray));
3191   PetscCall(PetscMalloc1(ncols, &idx));
3192   for (i = 0; i < ncols; i++) {
3193     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3194     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3195     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3196   }
3197   PetscCall(VecRestoreArray(x, &xarray));
3198   PetscCall(VecRestoreArray(cmap, &cmaparray));
3199   PetscCall(ISRestoreIndices(iscol, &is_idx));
3200 
3201   /* Get iscol_d */
3202   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3203   PetscCall(ISGetBlockSize(iscol, &i));
3204   PetscCall(ISSetBlockSize(*iscol_d, i));
3205 
3206   /* Get isrow_d */
3207   PetscCall(ISGetLocalSize(isrow, &m));
3208   rstart = mat->rmap->rstart;
3209   PetscCall(PetscMalloc1(m, &idx));
3210   PetscCall(ISGetIndices(isrow, &is_idx));
3211   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3212   PetscCall(ISRestoreIndices(isrow, &is_idx));
3213 
3214   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3215   PetscCall(ISGetBlockSize(isrow, &i));
3216   PetscCall(ISSetBlockSize(*isrow_d, i));
3217 
3218   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3219   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3220   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3221 
3222   PetscCall(VecDuplicate(lvec, &lcmap));
3223 
3224   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3225   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3226 
3227   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3228   /* off-process column indices */
3229   count = 0;
3230   PetscCall(PetscMalloc1(Bn, &idx));
3231   PetscCall(PetscMalloc1(Bn, &cmap1));
3232 
3233   PetscCall(VecGetArray(lvec, &xarray));
3234   PetscCall(VecGetArray(lcmap, &cmaparray));
3235   for (i = 0; i < Bn; i++) {
3236     if (PetscRealPart(xarray[i]) > -1.0) {
3237       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3238       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3239       count++;
3240     }
3241   }
3242   PetscCall(VecRestoreArray(lvec, &xarray));
3243   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3244 
3245   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3246   /* cannot ensure iscol_o has same blocksize as iscol! */
3247 
3248   PetscCall(PetscFree(idx));
3249   *garray = cmap1;
3250 
3251   PetscCall(VecDestroy(&x));
3252   PetscCall(VecDestroy(&cmap));
3253   PetscCall(VecDestroy(&lcmap));
3254   PetscFunctionReturn(PETSC_SUCCESS);
3255 }
3256 
3257 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3259 {
3260   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3261   Mat         M = NULL;
3262   MPI_Comm    comm;
3263   IS          iscol_d, isrow_d, iscol_o;
3264   Mat         Asub = NULL, Bsub = NULL;
3265   PetscInt    n;
3266 
3267   PetscFunctionBegin;
3268   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3269 
3270   if (call == MAT_REUSE_MATRIX) {
3271     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3272     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3273     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3274 
3275     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3276     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3277 
3278     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3279     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3280 
3281     /* Update diagonal and off-diagonal portions of submat */
3282     asub = (Mat_MPIAIJ *)(*submat)->data;
3283     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3284     PetscCall(ISGetLocalSize(iscol_o, &n));
3285     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3286     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3287     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3288 
3289   } else { /* call == MAT_INITIAL_MATRIX) */
3290     const PetscInt *garray;
3291     PetscInt        BsubN;
3292 
3293     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3294     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3295 
3296     /* Create local submatrices Asub and Bsub */
3297     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3298     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3299 
3300     /* Create submatrix M */
3301     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3302 
3303     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3304     asub = (Mat_MPIAIJ *)M->data;
3305 
3306     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3307     n = asub->B->cmap->N;
3308     if (BsubN > n) {
3309       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3310       const PetscInt *idx;
3311       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3312       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3313 
3314       PetscCall(PetscMalloc1(n, &idx_new));
3315       j = 0;
3316       PetscCall(ISGetIndices(iscol_o, &idx));
3317       for (i = 0; i < n; i++) {
3318         if (j >= BsubN) break;
3319         while (subgarray[i] > garray[j]) j++;
3320 
3321         if (subgarray[i] == garray[j]) {
3322           idx_new[i] = idx[j++];
3323         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3324       }
3325       PetscCall(ISRestoreIndices(iscol_o, &idx));
3326 
3327       PetscCall(ISDestroy(&iscol_o));
3328       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3329 
3330     } else if (BsubN < n) {
3331       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3332     }
3333 
3334     PetscCall(PetscFree(garray));
3335     *submat = M;
3336 
3337     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3338     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3339     PetscCall(ISDestroy(&isrow_d));
3340 
3341     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3342     PetscCall(ISDestroy(&iscol_d));
3343 
3344     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3345     PetscCall(ISDestroy(&iscol_o));
3346   }
3347   PetscFunctionReturn(PETSC_SUCCESS);
3348 }
3349 
3350 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3351 {
3352   IS        iscol_local = NULL, isrow_d;
3353   PetscInt  csize;
3354   PetscInt  n, i, j, start, end;
3355   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3356   MPI_Comm  comm;
3357 
3358   PetscFunctionBegin;
3359   /* If isrow has same processor distribution as mat,
3360      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3361   if (call == MAT_REUSE_MATRIX) {
3362     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3363     if (isrow_d) {
3364       sameRowDist  = PETSC_TRUE;
3365       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3366     } else {
3367       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3368       if (iscol_local) {
3369         sameRowDist  = PETSC_TRUE;
3370         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3371       }
3372     }
3373   } else {
3374     /* Check if isrow has same processor distribution as mat */
3375     sameDist[0] = PETSC_FALSE;
3376     PetscCall(ISGetLocalSize(isrow, &n));
3377     if (!n) {
3378       sameDist[0] = PETSC_TRUE;
3379     } else {
3380       PetscCall(ISGetMinMax(isrow, &i, &j));
3381       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3382       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3383     }
3384 
3385     /* Check if iscol has same processor distribution as mat */
3386     sameDist[1] = PETSC_FALSE;
3387     PetscCall(ISGetLocalSize(iscol, &n));
3388     if (!n) {
3389       sameDist[1] = PETSC_TRUE;
3390     } else {
3391       PetscCall(ISGetMinMax(iscol, &i, &j));
3392       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3393       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3394     }
3395 
3396     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3397     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3398     sameRowDist = tsameDist[0];
3399   }
3400 
3401   if (sameRowDist) {
3402     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3403       /* isrow and iscol have same processor distribution as mat */
3404       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3405       PetscFunctionReturn(PETSC_SUCCESS);
3406     } else { /* sameRowDist */
3407       /* isrow has same processor distribution as mat */
3408       if (call == MAT_INITIAL_MATRIX) {
3409         PetscBool sorted;
3410         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3411         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3412         PetscCall(ISGetSize(iscol, &i));
3413         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3414 
3415         PetscCall(ISSorted(iscol_local, &sorted));
3416         if (sorted) {
3417           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3418           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3419           PetscFunctionReturn(PETSC_SUCCESS);
3420         }
3421       } else { /* call == MAT_REUSE_MATRIX */
3422         IS iscol_sub;
3423         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3424         if (iscol_sub) {
3425           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3426           PetscFunctionReturn(PETSC_SUCCESS);
3427         }
3428       }
3429     }
3430   }
3431 
3432   /* General case: iscol -> iscol_local which has global size of iscol */
3433   if (call == MAT_REUSE_MATRIX) {
3434     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3435     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3436   } else {
3437     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3438   }
3439 
3440   PetscCall(ISGetLocalSize(iscol, &csize));
3441   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3442 
3443   if (call == MAT_INITIAL_MATRIX) {
3444     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3445     PetscCall(ISDestroy(&iscol_local));
3446   }
3447   PetscFunctionReturn(PETSC_SUCCESS);
3448 }
3449 
3450 /*@C
3451      MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3452          and "off-diagonal" part of the matrix in CSR format.
3453 
3454    Collective
3455 
3456    Input Parameters:
3457 +  comm - MPI communicator
3458 .  A - "diagonal" portion of matrix
3459 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3460 -  garray - global index of B columns
3461 
3462    Output Parameter:
3463 .   mat - the matrix, with input A as its local diagonal matrix
3464    Level: advanced
3465 
3466    Notes:
3467    See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3468 
3469    A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3470 
3471 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3472 @*/
3473 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3474 {
3475   Mat_MPIAIJ        *maij;
3476   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3477   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3478   const PetscScalar *oa;
3479   Mat                Bnew;
3480   PetscInt           m, n, N;
3481   MatType            mpi_mat_type;
3482 
3483   PetscFunctionBegin;
3484   PetscCall(MatCreate(comm, mat));
3485   PetscCall(MatGetSize(A, &m, &n));
3486   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3487   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3488   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3489   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3490 
3491   /* Get global columns of mat */
3492   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3493 
3494   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3495   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3496   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3497   PetscCall(MatSetType(*mat, mpi_mat_type));
3498 
3499   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3500   maij = (Mat_MPIAIJ *)(*mat)->data;
3501 
3502   (*mat)->preallocated = PETSC_TRUE;
3503 
3504   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3505   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3506 
3507   /* Set A as diagonal portion of *mat */
3508   maij->A = A;
3509 
3510   nz = oi[m];
3511   for (i = 0; i < nz; i++) {
3512     col   = oj[i];
3513     oj[i] = garray[col];
3514   }
3515 
3516   /* Set Bnew as off-diagonal portion of *mat */
3517   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3518   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3519   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3520   bnew        = (Mat_SeqAIJ *)Bnew->data;
3521   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3522   maij->B     = Bnew;
3523 
3524   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3525 
3526   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3527   b->free_a       = PETSC_FALSE;
3528   b->free_ij      = PETSC_FALSE;
3529   PetscCall(MatDestroy(&B));
3530 
3531   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3532   bnew->free_a       = PETSC_TRUE;
3533   bnew->free_ij      = PETSC_TRUE;
3534 
3535   /* condense columns of maij->B */
3536   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3537   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3538   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3539   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3540   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3541   PetscFunctionReturn(PETSC_SUCCESS);
3542 }
3543 
3544 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3545 
3546 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3547 {
3548   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3549   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3550   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3551   Mat             M, Msub, B = a->B;
3552   MatScalar      *aa;
3553   Mat_SeqAIJ     *aij;
3554   PetscInt       *garray = a->garray, *colsub, Ncols;
3555   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3556   IS              iscol_sub, iscmap;
3557   const PetscInt *is_idx, *cmap;
3558   PetscBool       allcolumns = PETSC_FALSE;
3559   MPI_Comm        comm;
3560 
3561   PetscFunctionBegin;
3562   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3563   if (call == MAT_REUSE_MATRIX) {
3564     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3565     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3566     PetscCall(ISGetLocalSize(iscol_sub, &count));
3567 
3568     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3569     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3570 
3571     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3572     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3573 
3574     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3575 
3576   } else { /* call == MAT_INITIAL_MATRIX) */
3577     PetscBool flg;
3578 
3579     PetscCall(ISGetLocalSize(iscol, &n));
3580     PetscCall(ISGetSize(iscol, &Ncols));
3581 
3582     /* (1) iscol -> nonscalable iscol_local */
3583     /* Check for special case: each processor gets entire matrix columns */
3584     PetscCall(ISIdentity(iscol_local, &flg));
3585     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3586     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3587     if (allcolumns) {
3588       iscol_sub = iscol_local;
3589       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3590       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3591 
3592     } else {
3593       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3594       PetscInt *idx, *cmap1, k;
3595       PetscCall(PetscMalloc1(Ncols, &idx));
3596       PetscCall(PetscMalloc1(Ncols, &cmap1));
3597       PetscCall(ISGetIndices(iscol_local, &is_idx));
3598       count = 0;
3599       k     = 0;
3600       for (i = 0; i < Ncols; i++) {
3601         j = is_idx[i];
3602         if (j >= cstart && j < cend) {
3603           /* diagonal part of mat */
3604           idx[count]     = j;
3605           cmap1[count++] = i; /* column index in submat */
3606         } else if (Bn) {
3607           /* off-diagonal part of mat */
3608           if (j == garray[k]) {
3609             idx[count]     = j;
3610             cmap1[count++] = i; /* column index in submat */
3611           } else if (j > garray[k]) {
3612             while (j > garray[k] && k < Bn - 1) k++;
3613             if (j == garray[k]) {
3614               idx[count]     = j;
3615               cmap1[count++] = i; /* column index in submat */
3616             }
3617           }
3618         }
3619       }
3620       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3621 
3622       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3623       PetscCall(ISGetBlockSize(iscol, &cbs));
3624       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3625 
3626       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3627     }
3628 
3629     /* (3) Create sequential Msub */
3630     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3631   }
3632 
3633   PetscCall(ISGetLocalSize(iscol_sub, &count));
3634   aij = (Mat_SeqAIJ *)(Msub)->data;
3635   ii  = aij->i;
3636   PetscCall(ISGetIndices(iscmap, &cmap));
3637 
3638   /*
3639       m - number of local rows
3640       Ncols - number of columns (same on all processors)
3641       rstart - first row in new global matrix generated
3642   */
3643   PetscCall(MatGetSize(Msub, &m, NULL));
3644 
3645   if (call == MAT_INITIAL_MATRIX) {
3646     /* (4) Create parallel newmat */
3647     PetscMPIInt rank, size;
3648     PetscInt    csize;
3649 
3650     PetscCallMPI(MPI_Comm_size(comm, &size));
3651     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3652 
3653     /*
3654         Determine the number of non-zeros in the diagonal and off-diagonal
3655         portions of the matrix in order to do correct preallocation
3656     */
3657 
3658     /* first get start and end of "diagonal" columns */
3659     PetscCall(ISGetLocalSize(iscol, &csize));
3660     if (csize == PETSC_DECIDE) {
3661       PetscCall(ISGetSize(isrow, &mglobal));
3662       if (mglobal == Ncols) { /* square matrix */
3663         nlocal = m;
3664       } else {
3665         nlocal = Ncols / size + ((Ncols % size) > rank);
3666       }
3667     } else {
3668       nlocal = csize;
3669     }
3670     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3671     rstart = rend - nlocal;
3672     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3673 
3674     /* next, compute all the lengths */
3675     jj = aij->j;
3676     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3677     olens = dlens + m;
3678     for (i = 0; i < m; i++) {
3679       jend = ii[i + 1] - ii[i];
3680       olen = 0;
3681       dlen = 0;
3682       for (j = 0; j < jend; j++) {
3683         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3684         else dlen++;
3685         jj++;
3686       }
3687       olens[i] = olen;
3688       dlens[i] = dlen;
3689     }
3690 
3691     PetscCall(ISGetBlockSize(isrow, &bs));
3692     PetscCall(ISGetBlockSize(iscol, &cbs));
3693 
3694     PetscCall(MatCreate(comm, &M));
3695     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3696     PetscCall(MatSetBlockSizes(M, bs, cbs));
3697     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3698     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3699     PetscCall(PetscFree(dlens));
3700 
3701   } else { /* call == MAT_REUSE_MATRIX */
3702     M = *newmat;
3703     PetscCall(MatGetLocalSize(M, &i, NULL));
3704     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3705     PetscCall(MatZeroEntries(M));
3706     /*
3707          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3708        rather than the slower MatSetValues().
3709     */
3710     M->was_assembled = PETSC_TRUE;
3711     M->assembled     = PETSC_FALSE;
3712   }
3713 
3714   /* (5) Set values of Msub to *newmat */
3715   PetscCall(PetscMalloc1(count, &colsub));
3716   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3717 
3718   jj = aij->j;
3719   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3720   for (i = 0; i < m; i++) {
3721     row = rstart + i;
3722     nz  = ii[i + 1] - ii[i];
3723     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3724     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3725     jj += nz;
3726     aa += nz;
3727   }
3728   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3729   PetscCall(ISRestoreIndices(iscmap, &cmap));
3730 
3731   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3732   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3733 
3734   PetscCall(PetscFree(colsub));
3735 
3736   /* save Msub, iscol_sub and iscmap used in processor for next request */
3737   if (call == MAT_INITIAL_MATRIX) {
3738     *newmat = M;
3739     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3740     PetscCall(MatDestroy(&Msub));
3741 
3742     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3743     PetscCall(ISDestroy(&iscol_sub));
3744 
3745     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3746     PetscCall(ISDestroy(&iscmap));
3747 
3748     if (iscol_local) {
3749       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3750       PetscCall(ISDestroy(&iscol_local));
3751     }
3752   }
3753   PetscFunctionReturn(PETSC_SUCCESS);
3754 }
3755 
3756 /*
3757     Not great since it makes two copies of the submatrix, first an SeqAIJ
3758   in local and then by concatenating the local matrices the end result.
3759   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3760 
3761   This requires a sequential iscol with all indices.
3762 */
3763 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3764 {
3765   PetscMPIInt rank, size;
3766   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3767   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3768   Mat         M, Mreuse;
3769   MatScalar  *aa, *vwork;
3770   MPI_Comm    comm;
3771   Mat_SeqAIJ *aij;
3772   PetscBool   colflag, allcolumns = PETSC_FALSE;
3773 
3774   PetscFunctionBegin;
3775   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3776   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3777   PetscCallMPI(MPI_Comm_size(comm, &size));
3778 
3779   /* Check for special case: each processor gets entire matrix columns */
3780   PetscCall(ISIdentity(iscol, &colflag));
3781   PetscCall(ISGetLocalSize(iscol, &n));
3782   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3783   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3784 
3785   if (call == MAT_REUSE_MATRIX) {
3786     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3787     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3788     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3789   } else {
3790     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3791   }
3792 
3793   /*
3794       m - number of local rows
3795       n - number of columns (same on all processors)
3796       rstart - first row in new global matrix generated
3797   */
3798   PetscCall(MatGetSize(Mreuse, &m, &n));
3799   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3800   if (call == MAT_INITIAL_MATRIX) {
3801     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3802     ii  = aij->i;
3803     jj  = aij->j;
3804 
3805     /*
3806         Determine the number of non-zeros in the diagonal and off-diagonal
3807         portions of the matrix in order to do correct preallocation
3808     */
3809 
3810     /* first get start and end of "diagonal" columns */
3811     if (csize == PETSC_DECIDE) {
3812       PetscCall(ISGetSize(isrow, &mglobal));
3813       if (mglobal == n) { /* square matrix */
3814         nlocal = m;
3815       } else {
3816         nlocal = n / size + ((n % size) > rank);
3817       }
3818     } else {
3819       nlocal = csize;
3820     }
3821     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3822     rstart = rend - nlocal;
3823     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3824 
3825     /* next, compute all the lengths */
3826     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3827     olens = dlens + m;
3828     for (i = 0; i < m; i++) {
3829       jend = ii[i + 1] - ii[i];
3830       olen = 0;
3831       dlen = 0;
3832       for (j = 0; j < jend; j++) {
3833         if (*jj < rstart || *jj >= rend) olen++;
3834         else dlen++;
3835         jj++;
3836       }
3837       olens[i] = olen;
3838       dlens[i] = dlen;
3839     }
3840     PetscCall(MatCreate(comm, &M));
3841     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3842     PetscCall(MatSetBlockSizes(M, bs, cbs));
3843     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3844     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3845     PetscCall(PetscFree(dlens));
3846   } else {
3847     PetscInt ml, nl;
3848 
3849     M = *newmat;
3850     PetscCall(MatGetLocalSize(M, &ml, &nl));
3851     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3852     PetscCall(MatZeroEntries(M));
3853     /*
3854          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3855        rather than the slower MatSetValues().
3856     */
3857     M->was_assembled = PETSC_TRUE;
3858     M->assembled     = PETSC_FALSE;
3859   }
3860   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3861   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3862   ii  = aij->i;
3863   jj  = aij->j;
3864 
3865   /* trigger copy to CPU if needed */
3866   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3867   for (i = 0; i < m; i++) {
3868     row   = rstart + i;
3869     nz    = ii[i + 1] - ii[i];
3870     cwork = jj;
3871     jj += nz;
3872     vwork = aa;
3873     aa += nz;
3874     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3875   }
3876   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3877 
3878   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3879   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3880   *newmat = M;
3881 
3882   /* save submatrix used in processor for next request */
3883   if (call == MAT_INITIAL_MATRIX) {
3884     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3885     PetscCall(MatDestroy(&Mreuse));
3886   }
3887   PetscFunctionReturn(PETSC_SUCCESS);
3888 }
3889 
3890 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3891 {
3892   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3893   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3894   const PetscInt *JJ;
3895   PetscBool       nooffprocentries;
3896   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3897 
3898   PetscFunctionBegin;
3899   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3900 
3901   PetscCall(PetscLayoutSetUp(B->rmap));
3902   PetscCall(PetscLayoutSetUp(B->cmap));
3903   m      = B->rmap->n;
3904   cstart = B->cmap->rstart;
3905   cend   = B->cmap->rend;
3906   rstart = B->rmap->rstart;
3907 
3908   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3909 
3910   if (PetscDefined(USE_DEBUG)) {
3911     for (i = 0; i < m; i++) {
3912       nnz = Ii[i + 1] - Ii[i];
3913       JJ  = J + Ii[i];
3914       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3915       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3916       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3917     }
3918   }
3919 
3920   for (i = 0; i < m; i++) {
3921     nnz     = Ii[i + 1] - Ii[i];
3922     JJ      = J + Ii[i];
3923     nnz_max = PetscMax(nnz_max, nnz);
3924     d       = 0;
3925     for (j = 0; j < nnz; j++) {
3926       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3927     }
3928     d_nnz[i] = d;
3929     o_nnz[i] = nnz - d;
3930   }
3931   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3932   PetscCall(PetscFree2(d_nnz, o_nnz));
3933 
3934   for (i = 0; i < m; i++) {
3935     ii = i + rstart;
3936     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3937   }
3938   nooffprocentries    = B->nooffprocentries;
3939   B->nooffprocentries = PETSC_TRUE;
3940   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3941   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3942   B->nooffprocentries = nooffprocentries;
3943 
3944   /* count number of entries below block diagonal */
3945   PetscCall(PetscFree(Aij->ld));
3946   PetscCall(PetscCalloc1(m, &ld));
3947   Aij->ld = ld;
3948   for (i = 0; i < m; i++) {
3949     nnz = Ii[i + 1] - Ii[i];
3950     j   = 0;
3951     while (j < nnz && J[j] < cstart) j++;
3952     ld[i] = j;
3953     J += nnz;
3954   }
3955 
3956   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3957   PetscFunctionReturn(PETSC_SUCCESS);
3958 }
3959 
3960 /*@
3961    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3962    (the default parallel PETSc format).
3963 
3964    Collective
3965 
3966    Input Parameters:
3967 +  B - the matrix
3968 .  i - the indices into j for the start of each local row (starts with zero)
3969 .  j - the column indices for each local row (starts with zero)
3970 -  v - optional values in the matrix
3971 
3972    Level: developer
3973 
3974    Notes:
3975        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3976      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3977      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3978 
3979        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3980 
3981        The format which is used for the sparse matrix input, is equivalent to a
3982     row-major ordering.. i.e for the following matrix, the input data expected is
3983     as shown
3984 
3985 $        1 0 0
3986 $        2 0 3     P0
3987 $       -------
3988 $        4 5 6     P1
3989 $
3990 $     Process0 [P0]: rows_owned=[0,1]
3991 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3992 $        j =  {0,0,2}  [size = 3]
3993 $        v =  {1,2,3}  [size = 3]
3994 $
3995 $     Process1 [P1]: rows_owned=[2]
3996 $        i =  {0,3}    [size = nrow+1  = 1+1]
3997 $        j =  {0,1,2}  [size = 3]
3998 $        v =  {4,5,6}  [size = 3]
3999 
4000 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
4001           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
4002 @*/
4003 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4004 {
4005   PetscFunctionBegin;
4006   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4007   PetscFunctionReturn(PETSC_SUCCESS);
4008 }
4009 
4010 /*@C
4011    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4012    (the default parallel PETSc format).  For good matrix assembly performance
4013    the user should preallocate the matrix storage by setting the parameters
4014    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4015    performance can be increased by more than a factor of 50.
4016 
4017    Collective
4018 
4019    Input Parameters:
4020 +  B - the matrix
4021 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4022            (same value is used for all local rows)
4023 .  d_nnz - array containing the number of nonzeros in the various rows of the
4024            DIAGONAL portion of the local submatrix (possibly different for each row)
4025            or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure.
4026            The size of this array is equal to the number of local rows, i.e 'm'.
4027            For matrices that will be factored, you must leave room for (and set)
4028            the diagonal entry even if it is zero.
4029 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4030            submatrix (same value is used for all local rows).
4031 -  o_nnz - array containing the number of nonzeros in the various rows of the
4032            OFF-DIAGONAL portion of the local submatrix (possibly different for
4033            each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero
4034            structure. The size of this array is equal to the number
4035            of local rows, i.e 'm'.
4036 
4037    If the *_nnz parameter is given then the *_nz parameter is ignored
4038 
4039    The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77
4040    storage.  The stored row and column indices begin with zero.
4041    See [Sparse Matrices](sec_matsparse) for details.
4042 
4043    The parallel matrix is partitioned such that the first m0 rows belong to
4044    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4045    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4046 
4047    The DIAGONAL portion of the local submatrix of a processor can be defined
4048    as the submatrix which is obtained by extraction the part corresponding to
4049    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4050    first row that belongs to the processor, r2 is the last row belonging to
4051    the this processor, and c1-c2 is range of indices of the local part of a
4052    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4053    common case of a square matrix, the row and column ranges are the same and
4054    the DIAGONAL part is also square. The remaining portion of the local
4055    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4056 
4057    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4058 
4059    You can call MatGetInfo() to get information on how effective the preallocation was;
4060    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4061    You can also run with the option -info and look for messages with the string
4062    malloc in them to see if additional memory allocation was needed.
4063 
4064    Example usage:
4065 
4066    Consider the following 8x8 matrix with 34 non-zero values, that is
4067    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4068    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4069    as follows:
4070 
4071 .vb
4072             1  2  0  |  0  3  0  |  0  4
4073     Proc0   0  5  6  |  7  0  0  |  8  0
4074             9  0 10  | 11  0  0  | 12  0
4075     -------------------------------------
4076            13  0 14  | 15 16 17  |  0  0
4077     Proc1   0 18  0  | 19 20 21  |  0  0
4078             0  0  0  | 22 23  0  | 24  0
4079     -------------------------------------
4080     Proc2  25 26 27  |  0  0 28  | 29  0
4081            30  0  0  | 31 32 33  |  0 34
4082 .ve
4083 
4084    This can be represented as a collection of submatrices as:
4085 
4086 .vb
4087       A B C
4088       D E F
4089       G H I
4090 .ve
4091 
4092    Where the submatrices A,B,C are owned by proc0, D,E,F are
4093    owned by proc1, G,H,I are owned by proc2.
4094 
4095    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4096    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4097    The 'M','N' parameters are 8,8, and have the same values on all procs.
4098 
4099    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4100    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4101    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4102    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4103    part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ
4104    matrix, ans [DF] as another `MATSEQAIJ` matrix.
4105 
4106    When d_nz, o_nz parameters are specified, d_nz storage elements are
4107    allocated for every row of the local diagonal submatrix, and o_nz
4108    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4109    One way to choose d_nz and o_nz is to use the max nonzerors per local
4110    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4111    In this case, the values of d_nz,o_nz are:
4112 .vb
4113      proc0 : dnz = 2, o_nz = 2
4114      proc1 : dnz = 3, o_nz = 2
4115      proc2 : dnz = 1, o_nz = 4
4116 .ve
4117    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4118    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4119    for proc3. i.e we are using 12+15+10=37 storage locations to store
4120    34 values.
4121 
4122    When d_nnz, o_nnz parameters are specified, the storage is specified
4123    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4124    In the above case the values for d_nnz,o_nnz are:
4125 .vb
4126      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4127      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4128      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4129 .ve
4130    Here the space allocated is sum of all the above values i.e 34, and
4131    hence pre-allocation is perfect.
4132 
4133    Level: intermediate
4134 
4135 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4136           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4137 @*/
4138 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4139 {
4140   PetscFunctionBegin;
4141   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4142   PetscValidType(B, 1);
4143   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4144   PetscFunctionReturn(PETSC_SUCCESS);
4145 }
4146 
4147 /*@
4148      MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4149          CSR format for the local rows.
4150 
4151    Collective
4152 
4153    Input Parameters:
4154 +  comm - MPI communicator
4155 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4156 .  n - This value should be the same as the local size used in creating the
4157        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4158        calculated if N is given) For square matrices n is almost always m.
4159 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4160 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4161 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4162 .   j - column indices
4163 -   a - optional matrix values
4164 
4165    Output Parameter:
4166 .   mat - the matrix
4167 
4168    Level: intermediate
4169 
4170    Notes:
4171        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4172      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4173      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4174 
4175        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4176 
4177        The format which is used for the sparse matrix input, is equivalent to a
4178     row-major ordering.. i.e for the following matrix, the input data expected is
4179     as shown
4180 
4181        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4182 
4183 $        1 0 0
4184 $        2 0 3     P0
4185 $       -------
4186 $        4 5 6     P1
4187 $
4188 $     Process0 [P0]: rows_owned=[0,1]
4189 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4190 $        j =  {0,0,2}  [size = 3]
4191 $        v =  {1,2,3}  [size = 3]
4192 $
4193 $     Process1 [P1]: rows_owned=[2]
4194 $        i =  {0,3}    [size = nrow+1  = 1+1]
4195 $        j =  {0,1,2}  [size = 3]
4196 $        v =  {4,5,6}  [size = 3]
4197 
4198 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4199           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4200 @*/
4201 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4202 {
4203   PetscFunctionBegin;
4204   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4205   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4206   PetscCall(MatCreate(comm, mat));
4207   PetscCall(MatSetSizes(*mat, m, n, M, N));
4208   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4209   PetscCall(MatSetType(*mat, MATMPIAIJ));
4210   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4211   PetscFunctionReturn(PETSC_SUCCESS);
4212 }
4213 
4214 /*@
4215      MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4216          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()`
4217 
4218      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4219 
4220    Collective
4221 
4222    Input Parameters:
4223 +  mat - the matrix
4224 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4225 .  n - This value should be the same as the local size used in creating the
4226        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4227        calculated if N is given) For square matrices n is almost always m.
4228 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4229 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4230 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4231 .  J - column indices
4232 -  v - matrix values
4233 
4234    Level: intermediate
4235 
4236 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4237           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4238 @*/
4239 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4240 {
4241   PetscInt        nnz, i;
4242   PetscBool       nooffprocentries;
4243   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4244   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4245   PetscScalar    *ad, *ao;
4246   PetscInt        ldi, Iii, md;
4247   const PetscInt *Adi = Ad->i;
4248   PetscInt       *ld  = Aij->ld;
4249 
4250   PetscFunctionBegin;
4251   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4252   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4253   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4254   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4255 
4256   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4257   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4258 
4259   for (i = 0; i < m; i++) {
4260     nnz = Ii[i + 1] - Ii[i];
4261     Iii = Ii[i];
4262     ldi = ld[i];
4263     md  = Adi[i + 1] - Adi[i];
4264     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4265     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4266     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4267     ad += md;
4268     ao += nnz - md;
4269   }
4270   nooffprocentries      = mat->nooffprocentries;
4271   mat->nooffprocentries = PETSC_TRUE;
4272   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4273   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4274   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4275   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4276   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4277   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4278   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4279   mat->nooffprocentries = nooffprocentries;
4280   PetscFunctionReturn(PETSC_SUCCESS);
4281 }
4282 
4283 /*@
4284      MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4285 
4286    Collective
4287 
4288    Input Parameters:
4289 +  mat - the matrix
4290 -  v - matrix values, stored by row
4291 
4292    Level: intermediate
4293 
4294    Note:
4295    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4296 
4297 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4298           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4299 @*/
4300 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4301 {
4302   PetscInt        nnz, i, m;
4303   PetscBool       nooffprocentries;
4304   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4305   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4306   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4307   PetscScalar    *ad, *ao;
4308   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4309   PetscInt        ldi, Iii, md;
4310   PetscInt       *ld = Aij->ld;
4311 
4312   PetscFunctionBegin;
4313   m = mat->rmap->n;
4314 
4315   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4316   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4317   Iii = 0;
4318   for (i = 0; i < m; i++) {
4319     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4320     ldi = ld[i];
4321     md  = Adi[i + 1] - Adi[i];
4322     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4323     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4324     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4325     ad += md;
4326     ao += nnz - md;
4327     Iii += nnz;
4328   }
4329   nooffprocentries      = mat->nooffprocentries;
4330   mat->nooffprocentries = PETSC_TRUE;
4331   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4332   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4333   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4334   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4335   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4336   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4337   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4338   mat->nooffprocentries = nooffprocentries;
4339   PetscFunctionReturn(PETSC_SUCCESS);
4340 }
4341 
4342 /*@C
4343    MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4344    (the default parallel PETSc format).  For good matrix assembly performance
4345    the user should preallocate the matrix storage by setting the parameters
4346    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4347    performance can be increased by more than a factor of 50.
4348 
4349    Collective
4350 
4351    Input Parameters:
4352 +  comm - MPI communicator
4353 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4354            This value should be the same as the local size used in creating the
4355            y vector for the matrix-vector product y = Ax.
4356 .  n - This value should be the same as the local size used in creating the
4357        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4358        calculated if N is given) For square matrices n is almost always m.
4359 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4360 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4361 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4362            (same value is used for all local rows)
4363 .  d_nnz - array containing the number of nonzeros in the various rows of the
4364            DIAGONAL portion of the local submatrix (possibly different for each row)
4365            or NULL, if d_nz is used to specify the nonzero structure.
4366            The size of this array is equal to the number of local rows, i.e 'm'.
4367 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4368            submatrix (same value is used for all local rows).
4369 -  o_nnz - array containing the number of nonzeros in the various rows of the
4370            OFF-DIAGONAL portion of the local submatrix (possibly different for
4371            each row) or NULL, if o_nz is used to specify the nonzero
4372            structure. The size of this array is equal to the number
4373            of local rows, i.e 'm'.
4374 
4375    Output Parameter:
4376 .  A - the matrix
4377 
4378    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4379    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4380    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4381 
4382    Notes:
4383    If the *_nnz parameter is given then the *_nz parameter is ignored
4384 
4385    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4386    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4387    storage requirements for this matrix.
4388 
4389    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4390    processor than it must be used on all processors that share the object for
4391    that argument.
4392 
4393    The user MUST specify either the local or global matrix dimensions
4394    (possibly both).
4395 
4396    The parallel matrix is partitioned across processors such that the
4397    first m0 rows belong to process 0, the next m1 rows belong to
4398    process 1, the next m2 rows belong to process 2 etc.. where
4399    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4400    values corresponding to [m x N] submatrix.
4401 
4402    The columns are logically partitioned with the n0 columns belonging
4403    to 0th partition, the next n1 columns belonging to the next
4404    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4405 
4406    The DIAGONAL portion of the local submatrix on any given processor
4407    is the submatrix corresponding to the rows and columns m,n
4408    corresponding to the given processor. i.e diagonal matrix on
4409    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4410    etc. The remaining portion of the local submatrix [m x (N-n)]
4411    constitute the OFF-DIAGONAL portion. The example below better
4412    illustrates this concept.
4413 
4414    For a square global matrix we define each processor's diagonal portion
4415    to be its local rows and the corresponding columns (a square submatrix);
4416    each processor's off-diagonal portion encompasses the remainder of the
4417    local matrix (a rectangular submatrix).
4418 
4419    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4420 
4421    When calling this routine with a single process communicator, a matrix of
4422    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4423    type of communicator, use the construction mechanism
4424 .vb
4425      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4426 .ve
4427 
4428 $     MatCreate(...,&A);
4429 $     MatSetType(A,MATMPIAIJ);
4430 $     MatSetSizes(A, m,n,M,N);
4431 $     MatMPIAIJSetPreallocation(A,...);
4432 
4433    By default, this format uses inodes (identical nodes) when possible.
4434    We search for consecutive rows with the same nonzero structure, thereby
4435    reusing matrix information to achieve increased efficiency.
4436 
4437    Options Database Keys:
4438 +  -mat_no_inode  - Do not use inodes
4439 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4440 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4441         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4442         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4443 
4444    Example usage:
4445 
4446    Consider the following 8x8 matrix with 34 non-zero values, that is
4447    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4448    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4449    as follows
4450 
4451 .vb
4452             1  2  0  |  0  3  0  |  0  4
4453     Proc0   0  5  6  |  7  0  0  |  8  0
4454             9  0 10  | 11  0  0  | 12  0
4455     -------------------------------------
4456            13  0 14  | 15 16 17  |  0  0
4457     Proc1   0 18  0  | 19 20 21  |  0  0
4458             0  0  0  | 22 23  0  | 24  0
4459     -------------------------------------
4460     Proc2  25 26 27  |  0  0 28  | 29  0
4461            30  0  0  | 31 32 33  |  0 34
4462 .ve
4463 
4464    This can be represented as a collection of submatrices as
4465 
4466 .vb
4467       A B C
4468       D E F
4469       G H I
4470 .ve
4471 
4472    Where the submatrices A,B,C are owned by proc0, D,E,F are
4473    owned by proc1, G,H,I are owned by proc2.
4474 
4475    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4476    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4477    The 'M','N' parameters are 8,8, and have the same values on all procs.
4478 
4479    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4480    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4481    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4482    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4483    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4484    matrix, ans [DF] as another SeqAIJ matrix.
4485 
4486    When d_nz, o_nz parameters are specified, d_nz storage elements are
4487    allocated for every row of the local diagonal submatrix, and o_nz
4488    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4489    One way to choose d_nz and o_nz is to use the max nonzerors per local
4490    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4491    In this case, the values of d_nz,o_nz are
4492 .vb
4493      proc0 : dnz = 2, o_nz = 2
4494      proc1 : dnz = 3, o_nz = 2
4495      proc2 : dnz = 1, o_nz = 4
4496 .ve
4497    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4498    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4499    for proc3. i.e we are using 12+15+10=37 storage locations to store
4500    34 values.
4501 
4502    When d_nnz, o_nnz parameters are specified, the storage is specified
4503    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4504    In the above case the values for d_nnz,o_nnz are
4505 .vb
4506      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4507      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4508      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4509 .ve
4510    Here the space allocated is sum of all the above values i.e 34, and
4511    hence pre-allocation is perfect.
4512 
4513    Level: intermediate
4514 
4515 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4516           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4517 @*/
4518 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4519 {
4520   PetscMPIInt size;
4521 
4522   PetscFunctionBegin;
4523   PetscCall(MatCreate(comm, A));
4524   PetscCall(MatSetSizes(*A, m, n, M, N));
4525   PetscCallMPI(MPI_Comm_size(comm, &size));
4526   if (size > 1) {
4527     PetscCall(MatSetType(*A, MATMPIAIJ));
4528     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4529   } else {
4530     PetscCall(MatSetType(*A, MATSEQAIJ));
4531     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4532   }
4533   PetscFunctionReturn(PETSC_SUCCESS);
4534 }
4535 
4536 /*MC
4537     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4538 
4539     Synopsis:
4540     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4541 
4542     Not Collective
4543 
4544     Input Parameter:
4545 .   A - the `MATMPIAIJ` matrix
4546 
4547     Output Parameters:
4548 +   Ad - the diagonal portion of the matrix
4549 .   Ao - the off diagonal portion of the matrix
4550 .   colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4551 -   ierr - error code
4552 
4553      Level: advanced
4554 
4555     Note:
4556     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4557 
4558 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4559 M*/
4560 
4561 /*MC
4562     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4563 
4564     Synopsis:
4565     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4566 
4567     Not Collective
4568 
4569     Input Parameters:
4570 +   A - the `MATMPIAIJ` matrix
4571 .   Ad - the diagonal portion of the matrix
4572 .   Ao - the off diagonal portion of the matrix
4573 .   colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4574 -   ierr - error code
4575 
4576      Level: advanced
4577 
4578 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4579 M*/
4580 
4581 /*@C
4582   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4583 
4584   Not collective
4585 
4586   Input Parameter:
4587 . A - The `MATMPIAIJ` matrix
4588 
4589   Output Parameters:
4590 + Ad - The local diagonal block as a `MATSEQAIJ` matrix
4591 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix
4592 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4593 
4594   Level: intermediate
4595 
4596   Note:
4597   The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4598   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4599   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4600   local column numbers to global column numbers in the original matrix.
4601 
4602   Fortran Note:
4603   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4604 
4605 .seealso: `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4606 @*/
4607 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4608 {
4609   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4610   PetscBool   flg;
4611 
4612   PetscFunctionBegin;
4613   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4614   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4615   if (Ad) *Ad = a->A;
4616   if (Ao) *Ao = a->B;
4617   if (colmap) *colmap = a->garray;
4618   PetscFunctionReturn(PETSC_SUCCESS);
4619 }
4620 
4621 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4622 {
4623   PetscInt     m, N, i, rstart, nnz, Ii;
4624   PetscInt    *indx;
4625   PetscScalar *values;
4626   MatType      rootType;
4627 
4628   PetscFunctionBegin;
4629   PetscCall(MatGetSize(inmat, &m, &N));
4630   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4631     PetscInt *dnz, *onz, sum, bs, cbs;
4632 
4633     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4634     /* Check sum(n) = N */
4635     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4636     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4637 
4638     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4639     rstart -= m;
4640 
4641     MatPreallocateBegin(comm, m, n, dnz, onz);
4642     for (i = 0; i < m; i++) {
4643       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4644       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4645       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4646     }
4647 
4648     PetscCall(MatCreate(comm, outmat));
4649     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4650     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4651     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4652     PetscCall(MatGetRootType_Private(inmat, &rootType));
4653     PetscCall(MatSetType(*outmat, rootType));
4654     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4655     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4656     MatPreallocateEnd(dnz, onz);
4657     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4658   }
4659 
4660   /* numeric phase */
4661   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4662   for (i = 0; i < m; i++) {
4663     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4664     Ii = i + rstart;
4665     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4666     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4667   }
4668   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4669   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4670   PetscFunctionReturn(PETSC_SUCCESS);
4671 }
4672 
4673 PetscErrorCode MatFileSplit(Mat A, char *outfile)
4674 {
4675   PetscMPIInt        rank;
4676   PetscInt           m, N, i, rstart, nnz;
4677   size_t             len;
4678   const PetscInt    *indx;
4679   PetscViewer        out;
4680   char              *name;
4681   Mat                B;
4682   const PetscScalar *values;
4683 
4684   PetscFunctionBegin;
4685   PetscCall(MatGetLocalSize(A, &m, NULL));
4686   PetscCall(MatGetSize(A, NULL, &N));
4687   /* Should this be the type of the diagonal block of A? */
4688   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4689   PetscCall(MatSetSizes(B, m, N, m, N));
4690   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4691   PetscCall(MatSetType(B, MATSEQAIJ));
4692   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4693   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4694   for (i = 0; i < m; i++) {
4695     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4696     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4697     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4698   }
4699   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4700   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4701 
4702   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4703   PetscCall(PetscStrlen(outfile, &len));
4704   PetscCall(PetscMalloc1(len + 6, &name));
4705   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4706   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4707   PetscCall(PetscFree(name));
4708   PetscCall(MatView(B, out));
4709   PetscCall(PetscViewerDestroy(&out));
4710   PetscCall(MatDestroy(&B));
4711   PetscFunctionReturn(PETSC_SUCCESS);
4712 }
4713 
4714 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4715 {
4716   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4717 
4718   PetscFunctionBegin;
4719   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4720   PetscCall(PetscFree(merge->id_r));
4721   PetscCall(PetscFree(merge->len_s));
4722   PetscCall(PetscFree(merge->len_r));
4723   PetscCall(PetscFree(merge->bi));
4724   PetscCall(PetscFree(merge->bj));
4725   PetscCall(PetscFree(merge->buf_ri[0]));
4726   PetscCall(PetscFree(merge->buf_ri));
4727   PetscCall(PetscFree(merge->buf_rj[0]));
4728   PetscCall(PetscFree(merge->buf_rj));
4729   PetscCall(PetscFree(merge->coi));
4730   PetscCall(PetscFree(merge->coj));
4731   PetscCall(PetscFree(merge->owners_co));
4732   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4733   PetscCall(PetscFree(merge));
4734   PetscFunctionReturn(PETSC_SUCCESS);
4735 }
4736 
4737 #include <../src/mat/utils/freespace.h>
4738 #include <petscbt.h>
4739 
4740 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4741 {
4742   MPI_Comm             comm;
4743   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4744   PetscMPIInt          size, rank, taga, *len_s;
4745   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4746   PetscInt             proc, m;
4747   PetscInt           **buf_ri, **buf_rj;
4748   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4749   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4750   MPI_Request         *s_waits, *r_waits;
4751   MPI_Status          *status;
4752   const MatScalar     *aa, *a_a;
4753   MatScalar          **abuf_r, *ba_i;
4754   Mat_Merge_SeqsToMPI *merge;
4755   PetscContainer       container;
4756 
4757   PetscFunctionBegin;
4758   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4759   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4760 
4761   PetscCallMPI(MPI_Comm_size(comm, &size));
4762   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4763 
4764   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4765   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4766   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4767   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4768   aa = a_a;
4769 
4770   bi     = merge->bi;
4771   bj     = merge->bj;
4772   buf_ri = merge->buf_ri;
4773   buf_rj = merge->buf_rj;
4774 
4775   PetscCall(PetscMalloc1(size, &status));
4776   owners = merge->rowmap->range;
4777   len_s  = merge->len_s;
4778 
4779   /* send and recv matrix values */
4780   /*-----------------------------*/
4781   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4782   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4783 
4784   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4785   for (proc = 0, k = 0; proc < size; proc++) {
4786     if (!len_s[proc]) continue;
4787     i = owners[proc];
4788     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4789     k++;
4790   }
4791 
4792   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4793   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4794   PetscCall(PetscFree(status));
4795 
4796   PetscCall(PetscFree(s_waits));
4797   PetscCall(PetscFree(r_waits));
4798 
4799   /* insert mat values of mpimat */
4800   /*----------------------------*/
4801   PetscCall(PetscMalloc1(N, &ba_i));
4802   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4803 
4804   for (k = 0; k < merge->nrecv; k++) {
4805     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4806     nrows       = *(buf_ri_k[k]);
4807     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4808     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4809   }
4810 
4811   /* set values of ba */
4812   m = merge->rowmap->n;
4813   for (i = 0; i < m; i++) {
4814     arow = owners[rank] + i;
4815     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4816     bnzi = bi[i + 1] - bi[i];
4817     PetscCall(PetscArrayzero(ba_i, bnzi));
4818 
4819     /* add local non-zero vals of this proc's seqmat into ba */
4820     anzi   = ai[arow + 1] - ai[arow];
4821     aj     = a->j + ai[arow];
4822     aa     = a_a + ai[arow];
4823     nextaj = 0;
4824     for (j = 0; nextaj < anzi; j++) {
4825       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4826         ba_i[j] += aa[nextaj++];
4827       }
4828     }
4829 
4830     /* add received vals into ba */
4831     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4832       /* i-th row */
4833       if (i == *nextrow[k]) {
4834         anzi   = *(nextai[k] + 1) - *nextai[k];
4835         aj     = buf_rj[k] + *(nextai[k]);
4836         aa     = abuf_r[k] + *(nextai[k]);
4837         nextaj = 0;
4838         for (j = 0; nextaj < anzi; j++) {
4839           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4840             ba_i[j] += aa[nextaj++];
4841           }
4842         }
4843         nextrow[k]++;
4844         nextai[k]++;
4845       }
4846     }
4847     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4848   }
4849   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4850   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4851   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4852 
4853   PetscCall(PetscFree(abuf_r[0]));
4854   PetscCall(PetscFree(abuf_r));
4855   PetscCall(PetscFree(ba_i));
4856   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4857   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4858   PetscFunctionReturn(PETSC_SUCCESS);
4859 }
4860 
4861 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4862 {
4863   Mat                  B_mpi;
4864   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4865   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4866   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4867   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4868   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4869   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4870   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4871   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4872   MPI_Status          *status;
4873   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4874   PetscBT              lnkbt;
4875   Mat_Merge_SeqsToMPI *merge;
4876   PetscContainer       container;
4877 
4878   PetscFunctionBegin;
4879   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4880 
4881   /* make sure it is a PETSc comm */
4882   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4883   PetscCallMPI(MPI_Comm_size(comm, &size));
4884   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4885 
4886   PetscCall(PetscNew(&merge));
4887   PetscCall(PetscMalloc1(size, &status));
4888 
4889   /* determine row ownership */
4890   /*---------------------------------------------------------*/
4891   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4892   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4893   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4894   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4895   PetscCall(PetscLayoutSetUp(merge->rowmap));
4896   PetscCall(PetscMalloc1(size, &len_si));
4897   PetscCall(PetscMalloc1(size, &merge->len_s));
4898 
4899   m      = merge->rowmap->n;
4900   owners = merge->rowmap->range;
4901 
4902   /* determine the number of messages to send, their lengths */
4903   /*---------------------------------------------------------*/
4904   len_s = merge->len_s;
4905 
4906   len          = 0; /* length of buf_si[] */
4907   merge->nsend = 0;
4908   for (proc = 0; proc < size; proc++) {
4909     len_si[proc] = 0;
4910     if (proc == rank) {
4911       len_s[proc] = 0;
4912     } else {
4913       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4914       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4915     }
4916     if (len_s[proc]) {
4917       merge->nsend++;
4918       nrows = 0;
4919       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4920         if (ai[i + 1] > ai[i]) nrows++;
4921       }
4922       len_si[proc] = 2 * (nrows + 1);
4923       len += len_si[proc];
4924     }
4925   }
4926 
4927   /* determine the number and length of messages to receive for ij-structure */
4928   /*-------------------------------------------------------------------------*/
4929   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4930   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4931 
4932   /* post the Irecv of j-structure */
4933   /*-------------------------------*/
4934   PetscCall(PetscCommGetNewTag(comm, &tagj));
4935   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4936 
4937   /* post the Isend of j-structure */
4938   /*--------------------------------*/
4939   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4940 
4941   for (proc = 0, k = 0; proc < size; proc++) {
4942     if (!len_s[proc]) continue;
4943     i = owners[proc];
4944     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4945     k++;
4946   }
4947 
4948   /* receives and sends of j-structure are complete */
4949   /*------------------------------------------------*/
4950   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4951   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4952 
4953   /* send and recv i-structure */
4954   /*---------------------------*/
4955   PetscCall(PetscCommGetNewTag(comm, &tagi));
4956   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4957 
4958   PetscCall(PetscMalloc1(len + 1, &buf_s));
4959   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4960   for (proc = 0, k = 0; proc < size; proc++) {
4961     if (!len_s[proc]) continue;
4962     /* form outgoing message for i-structure:
4963          buf_si[0]:                 nrows to be sent
4964                [1:nrows]:           row index (global)
4965                [nrows+1:2*nrows+1]: i-structure index
4966     */
4967     /*-------------------------------------------*/
4968     nrows       = len_si[proc] / 2 - 1;
4969     buf_si_i    = buf_si + nrows + 1;
4970     buf_si[0]   = nrows;
4971     buf_si_i[0] = 0;
4972     nrows       = 0;
4973     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4974       anzi = ai[i + 1] - ai[i];
4975       if (anzi) {
4976         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4977         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4978         nrows++;
4979       }
4980     }
4981     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4982     k++;
4983     buf_si += len_si[proc];
4984   }
4985 
4986   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4987   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4988 
4989   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4990   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4991 
4992   PetscCall(PetscFree(len_si));
4993   PetscCall(PetscFree(len_ri));
4994   PetscCall(PetscFree(rj_waits));
4995   PetscCall(PetscFree2(si_waits, sj_waits));
4996   PetscCall(PetscFree(ri_waits));
4997   PetscCall(PetscFree(buf_s));
4998   PetscCall(PetscFree(status));
4999 
5000   /* compute a local seq matrix in each processor */
5001   /*----------------------------------------------*/
5002   /* allocate bi array and free space for accumulating nonzero column info */
5003   PetscCall(PetscMalloc1(m + 1, &bi));
5004   bi[0] = 0;
5005 
5006   /* create and initialize a linked list */
5007   nlnk = N + 1;
5008   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
5009 
5010   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5011   len = ai[owners[rank + 1]] - ai[owners[rank]];
5012   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
5013 
5014   current_space = free_space;
5015 
5016   /* determine symbolic info for each local row */
5017   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
5018 
5019   for (k = 0; k < merge->nrecv; k++) {
5020     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5021     nrows       = *buf_ri_k[k];
5022     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
5023     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
5024   }
5025 
5026   MatPreallocateBegin(comm, m, n, dnz, onz);
5027   len = 0;
5028   for (i = 0; i < m; i++) {
5029     bnzi = 0;
5030     /* add local non-zero cols of this proc's seqmat into lnk */
5031     arow = owners[rank] + i;
5032     anzi = ai[arow + 1] - ai[arow];
5033     aj   = a->j + ai[arow];
5034     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5035     bnzi += nlnk;
5036     /* add received col data into lnk */
5037     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5038       if (i == *nextrow[k]) {            /* i-th row */
5039         anzi = *(nextai[k] + 1) - *nextai[k];
5040         aj   = buf_rj[k] + *nextai[k];
5041         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5042         bnzi += nlnk;
5043         nextrow[k]++;
5044         nextai[k]++;
5045       }
5046     }
5047     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5048 
5049     /* if free space is not available, make more free space */
5050     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5051     /* copy data into free space, then initialize lnk */
5052     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5053     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5054 
5055     current_space->array += bnzi;
5056     current_space->local_used += bnzi;
5057     current_space->local_remaining -= bnzi;
5058 
5059     bi[i + 1] = bi[i] + bnzi;
5060   }
5061 
5062   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5063 
5064   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5065   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5066   PetscCall(PetscLLDestroy(lnk, lnkbt));
5067 
5068   /* create symbolic parallel matrix B_mpi */
5069   /*---------------------------------------*/
5070   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5071   PetscCall(MatCreate(comm, &B_mpi));
5072   if (n == PETSC_DECIDE) {
5073     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5074   } else {
5075     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5076   }
5077   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5078   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5079   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5080   MatPreallocateEnd(dnz, onz);
5081   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5082 
5083   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5084   B_mpi->assembled = PETSC_FALSE;
5085   merge->bi        = bi;
5086   merge->bj        = bj;
5087   merge->buf_ri    = buf_ri;
5088   merge->buf_rj    = buf_rj;
5089   merge->coi       = NULL;
5090   merge->coj       = NULL;
5091   merge->owners_co = NULL;
5092 
5093   PetscCall(PetscCommDestroy(&comm));
5094 
5095   /* attach the supporting struct to B_mpi for reuse */
5096   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5097   PetscCall(PetscContainerSetPointer(container, merge));
5098   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5099   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5100   PetscCall(PetscContainerDestroy(&container));
5101   *mpimat = B_mpi;
5102 
5103   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5104   PetscFunctionReturn(PETSC_SUCCESS);
5105 }
5106 
5107 /*@C
5108       MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5109                  matrices from each processor
5110 
5111     Collective
5112 
5113    Input Parameters:
5114 +    comm - the communicators the parallel matrix will live on
5115 .    seqmat - the input sequential matrices
5116 .    m - number of local rows (or `PETSC_DECIDE`)
5117 .    n - number of local columns (or `PETSC_DECIDE`)
5118 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5119 
5120    Output Parameter:
5121 .    mpimat - the parallel matrix generated
5122 
5123     Level: advanced
5124 
5125    Note:
5126      The dimensions of the sequential matrix in each processor MUST be the same.
5127      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5128      destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5129 @*/
5130 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5131 {
5132   PetscMPIInt size;
5133 
5134   PetscFunctionBegin;
5135   PetscCallMPI(MPI_Comm_size(comm, &size));
5136   if (size == 1) {
5137     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5138     if (scall == MAT_INITIAL_MATRIX) {
5139       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5140     } else {
5141       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5142     }
5143     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5144     PetscFunctionReturn(PETSC_SUCCESS);
5145   }
5146   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5147   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5148   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5149   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5150   PetscFunctionReturn(PETSC_SUCCESS);
5151 }
5152 
5153 /*@
5154      MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5155           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5156           with `MatGetSize()`
5157 
5158     Not Collective
5159 
5160    Input Parameters:
5161 +    A - the matrix
5162 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5163 
5164    Output Parameter:
5165 .    A_loc - the local sequential matrix generated
5166 
5167     Level: developer
5168 
5169    Notes:
5170      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5171 
5172      Destroy the matrix with `MatDestroy()`
5173 
5174 .seealso: `MatMPIAIJGetLocalMat()`
5175 @*/
5176 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5177 {
5178   PetscBool mpi;
5179 
5180   PetscFunctionBegin;
5181   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5182   if (mpi) {
5183     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5184   } else {
5185     *A_loc = A;
5186     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5187   }
5188   PetscFunctionReturn(PETSC_SUCCESS);
5189 }
5190 
5191 /*@
5192      MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5193           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5194           with `MatGetSize()`
5195 
5196     Not Collective
5197 
5198    Input Parameters:
5199 +    A - the matrix
5200 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5201 
5202    Output Parameter:
5203 .    A_loc - the local sequential matrix generated
5204 
5205     Level: developer
5206 
5207    Notes:
5208      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5209 
5210      When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A.
5211      If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called.
5212      This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
5213      modify the values of the returned A_loc.
5214 
5215 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5216 @*/
5217 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5218 {
5219   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5220   Mat_SeqAIJ        *mat, *a, *b;
5221   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5222   const PetscScalar *aa, *ba, *aav, *bav;
5223   PetscScalar       *ca, *cam;
5224   PetscMPIInt        size;
5225   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5226   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5227   PetscBool          match;
5228 
5229   PetscFunctionBegin;
5230   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5231   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5232   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5233   if (size == 1) {
5234     if (scall == MAT_INITIAL_MATRIX) {
5235       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5236       *A_loc = mpimat->A;
5237     } else if (scall == MAT_REUSE_MATRIX) {
5238       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5239     }
5240     PetscFunctionReturn(PETSC_SUCCESS);
5241   }
5242 
5243   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5244   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5245   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5246   ai = a->i;
5247   aj = a->j;
5248   bi = b->i;
5249   bj = b->j;
5250   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5251   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5252   aa = aav;
5253   ba = bav;
5254   if (scall == MAT_INITIAL_MATRIX) {
5255     PetscCall(PetscMalloc1(1 + am, &ci));
5256     ci[0] = 0;
5257     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5258     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5259     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5260     k = 0;
5261     for (i = 0; i < am; i++) {
5262       ncols_o = bi[i + 1] - bi[i];
5263       ncols_d = ai[i + 1] - ai[i];
5264       /* off-diagonal portion of A */
5265       for (jo = 0; jo < ncols_o; jo++) {
5266         col = cmap[*bj];
5267         if (col >= cstart) break;
5268         cj[k] = col;
5269         bj++;
5270         ca[k++] = *ba++;
5271       }
5272       /* diagonal portion of A */
5273       for (j = 0; j < ncols_d; j++) {
5274         cj[k]   = cstart + *aj++;
5275         ca[k++] = *aa++;
5276       }
5277       /* off-diagonal portion of A */
5278       for (j = jo; j < ncols_o; j++) {
5279         cj[k]   = cmap[*bj++];
5280         ca[k++] = *ba++;
5281       }
5282     }
5283     /* put together the new matrix */
5284     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5285     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5286     /* Since these are PETSc arrays, change flags to free them as necessary. */
5287     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5288     mat->free_a  = PETSC_TRUE;
5289     mat->free_ij = PETSC_TRUE;
5290     mat->nonew   = 0;
5291   } else if (scall == MAT_REUSE_MATRIX) {
5292     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5293     ci  = mat->i;
5294     cj  = mat->j;
5295     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5296     for (i = 0; i < am; i++) {
5297       /* off-diagonal portion of A */
5298       ncols_o = bi[i + 1] - bi[i];
5299       for (jo = 0; jo < ncols_o; jo++) {
5300         col = cmap[*bj];
5301         if (col >= cstart) break;
5302         *cam++ = *ba++;
5303         bj++;
5304       }
5305       /* diagonal portion of A */
5306       ncols_d = ai[i + 1] - ai[i];
5307       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5308       /* off-diagonal portion of A */
5309       for (j = jo; j < ncols_o; j++) {
5310         *cam++ = *ba++;
5311         bj++;
5312       }
5313     }
5314     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5315   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5316   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5317   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5318   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5319   PetscFunctionReturn(PETSC_SUCCESS);
5320 }
5321 
5322 /*@
5323      MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5324           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5325 
5326     Not Collective
5327 
5328    Input Parameters:
5329 +    A - the matrix
5330 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5331 
5332    Output Parameters:
5333 +    glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5334 -    A_loc - the local sequential matrix generated
5335 
5336     Level: developer
5337 
5338    Note:
5339      This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering)
5340 
5341 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5342 @*/
5343 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5344 {
5345   Mat             Ao, Ad;
5346   const PetscInt *cmap;
5347   PetscMPIInt     size;
5348   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5349 
5350   PetscFunctionBegin;
5351   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5352   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5353   if (size == 1) {
5354     if (scall == MAT_INITIAL_MATRIX) {
5355       PetscCall(PetscObjectReference((PetscObject)Ad));
5356       *A_loc = Ad;
5357     } else if (scall == MAT_REUSE_MATRIX) {
5358       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5359     }
5360     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5361     PetscFunctionReturn(PETSC_SUCCESS);
5362   }
5363   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5364   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5365   if (f) {
5366     PetscCall((*f)(A, scall, glob, A_loc));
5367   } else {
5368     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5369     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5370     Mat_SeqAIJ        *c;
5371     PetscInt          *ai = a->i, *aj = a->j;
5372     PetscInt          *bi = b->i, *bj = b->j;
5373     PetscInt          *ci, *cj;
5374     const PetscScalar *aa, *ba;
5375     PetscScalar       *ca;
5376     PetscInt           i, j, am, dn, on;
5377 
5378     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5379     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5380     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5381     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5382     if (scall == MAT_INITIAL_MATRIX) {
5383       PetscInt k;
5384       PetscCall(PetscMalloc1(1 + am, &ci));
5385       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5386       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5387       ci[0] = 0;
5388       for (i = 0, k = 0; i < am; i++) {
5389         const PetscInt ncols_o = bi[i + 1] - bi[i];
5390         const PetscInt ncols_d = ai[i + 1] - ai[i];
5391         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5392         /* diagonal portion of A */
5393         for (j = 0; j < ncols_d; j++, k++) {
5394           cj[k] = *aj++;
5395           ca[k] = *aa++;
5396         }
5397         /* off-diagonal portion of A */
5398         for (j = 0; j < ncols_o; j++, k++) {
5399           cj[k] = dn + *bj++;
5400           ca[k] = *ba++;
5401         }
5402       }
5403       /* put together the new matrix */
5404       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5405       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5406       /* Since these are PETSc arrays, change flags to free them as necessary. */
5407       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5408       c->free_a  = PETSC_TRUE;
5409       c->free_ij = PETSC_TRUE;
5410       c->nonew   = 0;
5411       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5412     } else if (scall == MAT_REUSE_MATRIX) {
5413       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5414       for (i = 0; i < am; i++) {
5415         const PetscInt ncols_d = ai[i + 1] - ai[i];
5416         const PetscInt ncols_o = bi[i + 1] - bi[i];
5417         /* diagonal portion of A */
5418         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5419         /* off-diagonal portion of A */
5420         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5421       }
5422       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5423     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5424     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5425     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5426     if (glob) {
5427       PetscInt cst, *gidx;
5428 
5429       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5430       PetscCall(PetscMalloc1(dn + on, &gidx));
5431       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5432       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5433       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5434     }
5435   }
5436   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5437   PetscFunctionReturn(PETSC_SUCCESS);
5438 }
5439 
5440 /*@C
5441      MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5442 
5443     Not Collective
5444 
5445    Input Parameters:
5446 +    A - the matrix
5447 .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5448 -    row, col - index sets of rows and columns to extract (or NULL)
5449 
5450    Output Parameter:
5451 .    A_loc - the local sequential matrix generated
5452 
5453     Level: developer
5454 
5455 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5456 @*/
5457 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5458 {
5459   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5460   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5461   IS          isrowa, iscola;
5462   Mat        *aloc;
5463   PetscBool   match;
5464 
5465   PetscFunctionBegin;
5466   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5467   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5468   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5469   if (!row) {
5470     start = A->rmap->rstart;
5471     end   = A->rmap->rend;
5472     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5473   } else {
5474     isrowa = *row;
5475   }
5476   if (!col) {
5477     start = A->cmap->rstart;
5478     cmap  = a->garray;
5479     nzA   = a->A->cmap->n;
5480     nzB   = a->B->cmap->n;
5481     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5482     ncols = 0;
5483     for (i = 0; i < nzB; i++) {
5484       if (cmap[i] < start) idx[ncols++] = cmap[i];
5485       else break;
5486     }
5487     imark = i;
5488     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5489     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5490     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5491   } else {
5492     iscola = *col;
5493   }
5494   if (scall != MAT_INITIAL_MATRIX) {
5495     PetscCall(PetscMalloc1(1, &aloc));
5496     aloc[0] = *A_loc;
5497   }
5498   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5499   if (!col) { /* attach global id of condensed columns */
5500     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5501   }
5502   *A_loc = aloc[0];
5503   PetscCall(PetscFree(aloc));
5504   if (!row) PetscCall(ISDestroy(&isrowa));
5505   if (!col) PetscCall(ISDestroy(&iscola));
5506   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5507   PetscFunctionReturn(PETSC_SUCCESS);
5508 }
5509 
5510 /*
5511  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5512  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5513  * on a global size.
5514  * */
5515 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5516 {
5517   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5518   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5519   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5520   PetscMPIInt            owner;
5521   PetscSFNode           *iremote, *oiremote;
5522   const PetscInt        *lrowindices;
5523   PetscSF                sf, osf;
5524   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5525   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5526   MPI_Comm               comm;
5527   ISLocalToGlobalMapping mapping;
5528   const PetscScalar     *pd_a, *po_a;
5529 
5530   PetscFunctionBegin;
5531   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5532   /* plocalsize is the number of roots
5533    * nrows is the number of leaves
5534    * */
5535   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5536   PetscCall(ISGetLocalSize(rows, &nrows));
5537   PetscCall(PetscCalloc1(nrows, &iremote));
5538   PetscCall(ISGetIndices(rows, &lrowindices));
5539   for (i = 0; i < nrows; i++) {
5540     /* Find a remote index and an owner for a row
5541      * The row could be local or remote
5542      * */
5543     owner = 0;
5544     lidx  = 0;
5545     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5546     iremote[i].index = lidx;
5547     iremote[i].rank  = owner;
5548   }
5549   /* Create SF to communicate how many nonzero columns for each row */
5550   PetscCall(PetscSFCreate(comm, &sf));
5551   /* SF will figure out the number of nonzero colunms for each row, and their
5552    * offsets
5553    * */
5554   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5555   PetscCall(PetscSFSetFromOptions(sf));
5556   PetscCall(PetscSFSetUp(sf));
5557 
5558   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5559   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5560   PetscCall(PetscCalloc1(nrows, &pnnz));
5561   roffsets[0] = 0;
5562   roffsets[1] = 0;
5563   for (i = 0; i < plocalsize; i++) {
5564     /* diag */
5565     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5566     /* off diag */
5567     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5568     /* compute offsets so that we relative location for each row */
5569     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5570     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5571   }
5572   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5573   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5574   /* 'r' means root, and 'l' means leaf */
5575   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5576   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5577   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5578   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5579   PetscCall(PetscSFDestroy(&sf));
5580   PetscCall(PetscFree(roffsets));
5581   PetscCall(PetscFree(nrcols));
5582   dntotalcols = 0;
5583   ontotalcols = 0;
5584   ncol        = 0;
5585   for (i = 0; i < nrows; i++) {
5586     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5587     ncol    = PetscMax(pnnz[i], ncol);
5588     /* diag */
5589     dntotalcols += nlcols[i * 2 + 0];
5590     /* off diag */
5591     ontotalcols += nlcols[i * 2 + 1];
5592   }
5593   /* We do not need to figure the right number of columns
5594    * since all the calculations will be done by going through the raw data
5595    * */
5596   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5597   PetscCall(MatSetUp(*P_oth));
5598   PetscCall(PetscFree(pnnz));
5599   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5600   /* diag */
5601   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5602   /* off diag */
5603   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5604   /* diag */
5605   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5606   /* off diag */
5607   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5608   dntotalcols = 0;
5609   ontotalcols = 0;
5610   ntotalcols  = 0;
5611   for (i = 0; i < nrows; i++) {
5612     owner = 0;
5613     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5614     /* Set iremote for diag matrix */
5615     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5616       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5617       iremote[dntotalcols].rank  = owner;
5618       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5619       ilocal[dntotalcols++] = ntotalcols++;
5620     }
5621     /* off diag */
5622     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5623       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5624       oiremote[ontotalcols].rank  = owner;
5625       oilocal[ontotalcols++]      = ntotalcols++;
5626     }
5627   }
5628   PetscCall(ISRestoreIndices(rows, &lrowindices));
5629   PetscCall(PetscFree(loffsets));
5630   PetscCall(PetscFree(nlcols));
5631   PetscCall(PetscSFCreate(comm, &sf));
5632   /* P serves as roots and P_oth is leaves
5633    * Diag matrix
5634    * */
5635   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5636   PetscCall(PetscSFSetFromOptions(sf));
5637   PetscCall(PetscSFSetUp(sf));
5638 
5639   PetscCall(PetscSFCreate(comm, &osf));
5640   /* Off diag */
5641   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5642   PetscCall(PetscSFSetFromOptions(osf));
5643   PetscCall(PetscSFSetUp(osf));
5644   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5645   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5646   /* We operate on the matrix internal data for saving memory */
5647   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5648   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5649   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5650   /* Convert to global indices for diag matrix */
5651   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5652   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5653   /* We want P_oth store global indices */
5654   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5655   /* Use memory scalable approach */
5656   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5657   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5658   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5659   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5660   /* Convert back to local indices */
5661   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5662   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5663   nout = 0;
5664   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5665   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5666   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5667   /* Exchange values */
5668   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5669   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5670   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5671   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5672   /* Stop PETSc from shrinking memory */
5673   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5674   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5675   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5676   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5677   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5678   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5679   PetscCall(PetscSFDestroy(&sf));
5680   PetscCall(PetscSFDestroy(&osf));
5681   PetscFunctionReturn(PETSC_SUCCESS);
5682 }
5683 
5684 /*
5685  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5686  * This supports MPIAIJ and MAIJ
5687  * */
5688 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5689 {
5690   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5691   Mat_SeqAIJ *p_oth;
5692   IS          rows, map;
5693   PetscHMapI  hamp;
5694   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5695   MPI_Comm    comm;
5696   PetscSF     sf, osf;
5697   PetscBool   has;
5698 
5699   PetscFunctionBegin;
5700   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5701   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5702   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5703    *  and then create a submatrix (that often is an overlapping matrix)
5704    * */
5705   if (reuse == MAT_INITIAL_MATRIX) {
5706     /* Use a hash table to figure out unique keys */
5707     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5708     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5709     count = 0;
5710     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5711     for (i = 0; i < a->B->cmap->n; i++) {
5712       key = a->garray[i] / dof;
5713       PetscCall(PetscHMapIHas(hamp, key, &has));
5714       if (!has) {
5715         mapping[i] = count;
5716         PetscCall(PetscHMapISet(hamp, key, count++));
5717       } else {
5718         /* Current 'i' has the same value the previous step */
5719         mapping[i] = count - 1;
5720       }
5721     }
5722     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5723     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5724     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5725     PetscCall(PetscCalloc1(htsize, &rowindices));
5726     off = 0;
5727     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5728     PetscCall(PetscHMapIDestroy(&hamp));
5729     PetscCall(PetscSortInt(htsize, rowindices));
5730     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5731     /* In case, the matrix was already created but users want to recreate the matrix */
5732     PetscCall(MatDestroy(P_oth));
5733     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5734     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5735     PetscCall(ISDestroy(&map));
5736     PetscCall(ISDestroy(&rows));
5737   } else if (reuse == MAT_REUSE_MATRIX) {
5738     /* If matrix was already created, we simply update values using SF objects
5739      * that as attached to the matrix earlier.
5740      */
5741     const PetscScalar *pd_a, *po_a;
5742 
5743     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5744     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5745     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5746     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5747     /* Update values in place */
5748     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5749     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5750     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5751     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5752     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5753     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5754     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5755     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5756   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5757   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5758   PetscFunctionReturn(PETSC_SUCCESS);
5759 }
5760 
5761 /*@C
5762   MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A
5763 
5764   Collective
5765 
5766   Input Parameters:
5767 + A - the first matrix in `MATMPIAIJ` format
5768 . B - the second matrix in `MATMPIAIJ` format
5769 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5770 
5771   Output Parameters:
5772 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5773 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5774 - B_seq - the sequential matrix generated
5775 
5776   Level: developer
5777 
5778 @*/
5779 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5780 {
5781   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5782   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5783   IS          isrowb, iscolb;
5784   Mat        *bseq = NULL;
5785 
5786   PetscFunctionBegin;
5787   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5788     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5789   }
5790   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5791 
5792   if (scall == MAT_INITIAL_MATRIX) {
5793     start = A->cmap->rstart;
5794     cmap  = a->garray;
5795     nzA   = a->A->cmap->n;
5796     nzB   = a->B->cmap->n;
5797     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5798     ncols = 0;
5799     for (i = 0; i < nzB; i++) { /* row < local row index */
5800       if (cmap[i] < start) idx[ncols++] = cmap[i];
5801       else break;
5802     }
5803     imark = i;
5804     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5805     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5806     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5807     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5808   } else {
5809     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5810     isrowb = *rowb;
5811     iscolb = *colb;
5812     PetscCall(PetscMalloc1(1, &bseq));
5813     bseq[0] = *B_seq;
5814   }
5815   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5816   *B_seq = bseq[0];
5817   PetscCall(PetscFree(bseq));
5818   if (!rowb) {
5819     PetscCall(ISDestroy(&isrowb));
5820   } else {
5821     *rowb = isrowb;
5822   }
5823   if (!colb) {
5824     PetscCall(ISDestroy(&iscolb));
5825   } else {
5826     *colb = iscolb;
5827   }
5828   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5829   PetscFunctionReturn(PETSC_SUCCESS);
5830 }
5831 
5832 /*
5833     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5834     of the OFF-DIAGONAL portion of local A
5835 
5836     Collective
5837 
5838    Input Parameters:
5839 +    A,B - the matrices in mpiaij format
5840 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5841 
5842    Output Parameter:
5843 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5844 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5845 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5846 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5847 
5848     Developer Note:
5849     This directly accesses information inside the VecScatter associated with the matrix-vector product
5850      for this matrix. This is not desirable..
5851 
5852     Level: developer
5853 
5854 */
5855 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5856 {
5857   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5858   Mat_SeqAIJ        *b_oth;
5859   VecScatter         ctx;
5860   MPI_Comm           comm;
5861   const PetscMPIInt *rprocs, *sprocs;
5862   const PetscInt    *srow, *rstarts, *sstarts;
5863   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5864   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5865   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5866   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5867   PetscMPIInt        size, tag, rank, nreqs;
5868 
5869   PetscFunctionBegin;
5870   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5871   PetscCallMPI(MPI_Comm_size(comm, &size));
5872 
5873   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5874     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5875   }
5876   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5877   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5878 
5879   if (size == 1) {
5880     startsj_s = NULL;
5881     bufa_ptr  = NULL;
5882     *B_oth    = NULL;
5883     PetscFunctionReturn(PETSC_SUCCESS);
5884   }
5885 
5886   ctx = a->Mvctx;
5887   tag = ((PetscObject)ctx)->tag;
5888 
5889   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5890   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5891   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5892   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5893   PetscCall(PetscMalloc1(nreqs, &reqs));
5894   rwaits = reqs;
5895   swaits = reqs + nrecvs;
5896 
5897   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5898   if (scall == MAT_INITIAL_MATRIX) {
5899     /* i-array */
5900     /*---------*/
5901     /*  post receives */
5902     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5903     for (i = 0; i < nrecvs; i++) {
5904       rowlen = rvalues + rstarts[i] * rbs;
5905       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5906       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5907     }
5908 
5909     /* pack the outgoing message */
5910     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5911 
5912     sstartsj[0] = 0;
5913     rstartsj[0] = 0;
5914     len         = 0; /* total length of j or a array to be sent */
5915     if (nsends) {
5916       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5917       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5918     }
5919     for (i = 0; i < nsends; i++) {
5920       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5921       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5922       for (j = 0; j < nrows; j++) {
5923         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5924         for (l = 0; l < sbs; l++) {
5925           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5926 
5927           rowlen[j * sbs + l] = ncols;
5928 
5929           len += ncols;
5930           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5931         }
5932         k++;
5933       }
5934       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5935 
5936       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5937     }
5938     /* recvs and sends of i-array are completed */
5939     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5940     PetscCall(PetscFree(svalues));
5941 
5942     /* allocate buffers for sending j and a arrays */
5943     PetscCall(PetscMalloc1(len + 1, &bufj));
5944     PetscCall(PetscMalloc1(len + 1, &bufa));
5945 
5946     /* create i-array of B_oth */
5947     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5948 
5949     b_othi[0] = 0;
5950     len       = 0; /* total length of j or a array to be received */
5951     k         = 0;
5952     for (i = 0; i < nrecvs; i++) {
5953       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5954       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5955       for (j = 0; j < nrows; j++) {
5956         b_othi[k + 1] = b_othi[k] + rowlen[j];
5957         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5958         k++;
5959       }
5960       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5961     }
5962     PetscCall(PetscFree(rvalues));
5963 
5964     /* allocate space for j and a arrays of B_oth */
5965     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5966     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5967 
5968     /* j-array */
5969     /*---------*/
5970     /*  post receives of j-array */
5971     for (i = 0; i < nrecvs; i++) {
5972       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5973       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5974     }
5975 
5976     /* pack the outgoing message j-array */
5977     if (nsends) k = sstarts[0];
5978     for (i = 0; i < nsends; i++) {
5979       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5980       bufJ  = bufj + sstartsj[i];
5981       for (j = 0; j < nrows; j++) {
5982         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5983         for (ll = 0; ll < sbs; ll++) {
5984           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5985           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5986           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5987         }
5988       }
5989       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5990     }
5991 
5992     /* recvs and sends of j-array are completed */
5993     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5994   } else if (scall == MAT_REUSE_MATRIX) {
5995     sstartsj = *startsj_s;
5996     rstartsj = *startsj_r;
5997     bufa     = *bufa_ptr;
5998     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5999     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
6000   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
6001 
6002   /* a-array */
6003   /*---------*/
6004   /*  post receives of a-array */
6005   for (i = 0; i < nrecvs; i++) {
6006     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
6007     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
6008   }
6009 
6010   /* pack the outgoing message a-array */
6011   if (nsends) k = sstarts[0];
6012   for (i = 0; i < nsends; i++) {
6013     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
6014     bufA  = bufa + sstartsj[i];
6015     for (j = 0; j < nrows; j++) {
6016       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
6017       for (ll = 0; ll < sbs; ll++) {
6018         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6019         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
6020         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6021       }
6022     }
6023     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6024   }
6025   /* recvs and sends of a-array are completed */
6026   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6027   PetscCall(PetscFree(reqs));
6028 
6029   if (scall == MAT_INITIAL_MATRIX) {
6030     /* put together the new matrix */
6031     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6032 
6033     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6034     /* Since these are PETSc arrays, change flags to free them as necessary. */
6035     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6036     b_oth->free_a  = PETSC_TRUE;
6037     b_oth->free_ij = PETSC_TRUE;
6038     b_oth->nonew   = 0;
6039 
6040     PetscCall(PetscFree(bufj));
6041     if (!startsj_s || !bufa_ptr) {
6042       PetscCall(PetscFree2(sstartsj, rstartsj));
6043       PetscCall(PetscFree(bufa_ptr));
6044     } else {
6045       *startsj_s = sstartsj;
6046       *startsj_r = rstartsj;
6047       *bufa_ptr  = bufa;
6048     }
6049   } else if (scall == MAT_REUSE_MATRIX) {
6050     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6051   }
6052 
6053   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6054   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6055   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6056   PetscFunctionReturn(PETSC_SUCCESS);
6057 }
6058 
6059 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6060 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6062 #if defined(PETSC_HAVE_MKL_SPARSE)
6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6064 #endif
6065 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6066 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6067 #if defined(PETSC_HAVE_ELEMENTAL)
6068 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6069 #endif
6070 #if defined(PETSC_HAVE_SCALAPACK)
6071 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6072 #endif
6073 #if defined(PETSC_HAVE_HYPRE)
6074 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6075 #endif
6076 #if defined(PETSC_HAVE_CUDA)
6077 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6078 #endif
6079 #if defined(PETSC_HAVE_HIP)
6080 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6081 #endif
6082 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6083 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6084 #endif
6085 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6086 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6087 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6088 
6089 /*
6090     Computes (B'*A')' since computing B*A directly is untenable
6091 
6092                n                       p                          p
6093         [             ]       [             ]         [                 ]
6094       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6095         [             ]       [             ]         [                 ]
6096 
6097 */
6098 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6099 {
6100   Mat At, Bt, Ct;
6101 
6102   PetscFunctionBegin;
6103   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6104   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6105   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6106   PetscCall(MatDestroy(&At));
6107   PetscCall(MatDestroy(&Bt));
6108   PetscCall(MatTransposeSetPrecursor(Ct, C));
6109   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6110   PetscCall(MatDestroy(&Ct));
6111   PetscFunctionReturn(PETSC_SUCCESS);
6112 }
6113 
6114 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6115 {
6116   PetscBool cisdense;
6117 
6118   PetscFunctionBegin;
6119   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6120   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6121   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6122   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6123   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6124   PetscCall(MatSetUp(C));
6125 
6126   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6127   PetscFunctionReturn(PETSC_SUCCESS);
6128 }
6129 
6130 /* ----------------------------------------------------------------*/
6131 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6132 {
6133   Mat_Product *product = C->product;
6134   Mat          A = product->A, B = product->B;
6135 
6136   PetscFunctionBegin;
6137   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6138     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6139 
6140   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6141   C->ops->productsymbolic = MatProductSymbolic_AB;
6142   PetscFunctionReturn(PETSC_SUCCESS);
6143 }
6144 
6145 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6146 {
6147   Mat_Product *product = C->product;
6148 
6149   PetscFunctionBegin;
6150   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6151   PetscFunctionReturn(PETSC_SUCCESS);
6152 }
6153 
6154 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6155 
6156   Input Parameters:
6157 
6158     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6159     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6160 
6161     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6162 
6163     For Set1, j1[] contains column indices of the nonzeros.
6164     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6165     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6166     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6167 
6168     Similar for Set2.
6169 
6170     This routine merges the two sets of nonzeros row by row and removes repeats.
6171 
6172   Output Parameters: (memory is allocated by the caller)
6173 
6174     i[],j[]: the CSR of the merged matrix, which has m rows.
6175     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6176     imap2[]: similar to imap1[], but for Set2.
6177     Note we order nonzeros row-by-row and from left to right.
6178 */
6179 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6180 {
6181   PetscInt   r, m; /* Row index of mat */
6182   PetscCount t, t1, t2, b1, e1, b2, e2;
6183 
6184   PetscFunctionBegin;
6185   PetscCall(MatGetLocalSize(mat, &m, NULL));
6186   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6187   i[0]        = 0;
6188   for (r = 0; r < m; r++) { /* Do row by row merging */
6189     b1 = rowBegin1[r];
6190     e1 = rowEnd1[r];
6191     b2 = rowBegin2[r];
6192     e2 = rowEnd2[r];
6193     while (b1 < e1 && b2 < e2) {
6194       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6195         j[t]      = j1[b1];
6196         imap1[t1] = t;
6197         imap2[t2] = t;
6198         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6199         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6200         t1++;
6201         t2++;
6202         t++;
6203       } else if (j1[b1] < j2[b2]) {
6204         j[t]      = j1[b1];
6205         imap1[t1] = t;
6206         b1 += jmap1[t1 + 1] - jmap1[t1];
6207         t1++;
6208         t++;
6209       } else {
6210         j[t]      = j2[b2];
6211         imap2[t2] = t;
6212         b2 += jmap2[t2 + 1] - jmap2[t2];
6213         t2++;
6214         t++;
6215       }
6216     }
6217     /* Merge the remaining in either j1[] or j2[] */
6218     while (b1 < e1) {
6219       j[t]      = j1[b1];
6220       imap1[t1] = t;
6221       b1 += jmap1[t1 + 1] - jmap1[t1];
6222       t1++;
6223       t++;
6224     }
6225     while (b2 < e2) {
6226       j[t]      = j2[b2];
6227       imap2[t2] = t;
6228       b2 += jmap2[t2 + 1] - jmap2[t2];
6229       t2++;
6230       t++;
6231     }
6232     i[r + 1] = t;
6233   }
6234   PetscFunctionReturn(PETSC_SUCCESS);
6235 }
6236 
6237 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6238 
6239   Input Parameters:
6240     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6241     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6242       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6243 
6244       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6245       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6246 
6247   Output Parameters:
6248     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6249     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6250       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6251       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6252 
6253     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6254       Atot: number of entries belonging to the diagonal block.
6255       Annz: number of unique nonzeros belonging to the diagonal block.
6256       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6257         repeats (i.e., same 'i,j' pair).
6258       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6259         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6260 
6261       Atot: number of entries belonging to the diagonal block
6262       Annz: number of unique nonzeros belonging to the diagonal block.
6263 
6264     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6265 
6266     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6267 */
6268 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6269 {
6270   PetscInt    cstart, cend, rstart, rend, row, col;
6271   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6272   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6273   PetscCount  k, m, p, q, r, s, mid;
6274   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6275 
6276   PetscFunctionBegin;
6277   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6278   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6279   m = rend - rstart;
6280 
6281   for (k = 0; k < n; k++) {
6282     if (i[k] >= 0) break;
6283   } /* Skip negative rows */
6284 
6285   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6286      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6287   */
6288   while (k < n) {
6289     row = i[k];
6290     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6291     for (s = k; s < n; s++)
6292       if (i[s] != row) break;
6293     for (p = k; p < s; p++) {
6294       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6295       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6296     }
6297     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6298     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6299     rowBegin[row - rstart] = k;
6300     rowMid[row - rstart]   = mid;
6301     rowEnd[row - rstart]   = s;
6302 
6303     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6304     Atot += mid - k;
6305     Btot += s - mid;
6306 
6307     /* Count unique nonzeros of this diag/offdiag row */
6308     for (p = k; p < mid;) {
6309       col = j[p];
6310       do {
6311         j[p] += PETSC_MAX_INT;
6312         p++;
6313       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6314       Annz++;
6315     }
6316 
6317     for (p = mid; p < s;) {
6318       col = j[p];
6319       do {
6320         p++;
6321       } while (p < s && j[p] == col);
6322       Bnnz++;
6323     }
6324     k = s;
6325   }
6326 
6327   /* Allocation according to Atot, Btot, Annz, Bnnz */
6328   PetscCall(PetscMalloc1(Atot, &Aperm));
6329   PetscCall(PetscMalloc1(Btot, &Bperm));
6330   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6331   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6332 
6333   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6334   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6335   for (r = 0; r < m; r++) {
6336     k   = rowBegin[r];
6337     mid = rowMid[r];
6338     s   = rowEnd[r];
6339     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6340     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6341     Atot += mid - k;
6342     Btot += s - mid;
6343 
6344     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6345     for (p = k; p < mid;) {
6346       col = j[p];
6347       q   = p;
6348       do {
6349         p++;
6350       } while (p < mid && j[p] == col);
6351       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6352       Annz++;
6353     }
6354 
6355     for (p = mid; p < s;) {
6356       col = j[p];
6357       q   = p;
6358       do {
6359         p++;
6360       } while (p < s && j[p] == col);
6361       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6362       Bnnz++;
6363     }
6364   }
6365   /* Output */
6366   *Aperm_ = Aperm;
6367   *Annz_  = Annz;
6368   *Atot_  = Atot;
6369   *Ajmap_ = Ajmap;
6370   *Bperm_ = Bperm;
6371   *Bnnz_  = Bnnz;
6372   *Btot_  = Btot;
6373   *Bjmap_ = Bjmap;
6374   PetscFunctionReturn(PETSC_SUCCESS);
6375 }
6376 
6377 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6378 
6379   Input Parameters:
6380     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6381     nnz:  number of unique nonzeros in the merged matrix
6382     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6383     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6384 
6385   Output Parameter: (memory is allocated by the caller)
6386     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6387 
6388   Example:
6389     nnz1 = 4
6390     nnz  = 6
6391     imap = [1,3,4,5]
6392     jmap = [0,3,5,6,7]
6393    then,
6394     jmap_new = [0,0,3,3,5,6,7]
6395 */
6396 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6397 {
6398   PetscCount k, p;
6399 
6400   PetscFunctionBegin;
6401   jmap_new[0] = 0;
6402   p           = nnz;                /* p loops over jmap_new[] backwards */
6403   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6404     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6405   }
6406   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6407   PetscFunctionReturn(PETSC_SUCCESS);
6408 }
6409 
6410 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6411 {
6412   MPI_Comm    comm;
6413   PetscMPIInt rank, size;
6414   PetscInt    m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6415   PetscCount  k, p, q, rem;                           /* Loop variables over coo arrays */
6416   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
6417 
6418   PetscFunctionBegin;
6419   PetscCall(PetscFree(mpiaij->garray));
6420   PetscCall(VecDestroy(&mpiaij->lvec));
6421 #if defined(PETSC_USE_CTABLE)
6422   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6423 #else
6424   PetscCall(PetscFree(mpiaij->colmap));
6425 #endif
6426   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6427   mat->assembled     = PETSC_FALSE;
6428   mat->was_assembled = PETSC_FALSE;
6429   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6430 
6431   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6432   PetscCallMPI(MPI_Comm_size(comm, &size));
6433   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6434   PetscCall(PetscLayoutSetUp(mat->rmap));
6435   PetscCall(PetscLayoutSetUp(mat->cmap));
6436   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6437   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6438   PetscCall(MatGetLocalSize(mat, &m, &n));
6439   PetscCall(MatGetSize(mat, &M, &N));
6440 
6441   /* ---------------------------------------------------------------------------*/
6442   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6443   /* entries come first, then local rows, then remote rows.                     */
6444   /* ---------------------------------------------------------------------------*/
6445   PetscCount n1 = coo_n, *perm1;
6446   PetscInt  *i1 = coo_i, *j1 = coo_j;
6447 
6448   PetscCall(PetscMalloc1(n1, &perm1));
6449   for (k = 0; k < n1; k++) perm1[k] = k;
6450 
6451   /* Manipulate indices so that entries with negative row or col indices will have smallest
6452      row indices, local entries will have greater but negative row indices, and remote entries
6453      will have positive row indices.
6454   */
6455   for (k = 0; k < n1; k++) {
6456     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6457     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6458     else {
6459       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6460       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6461     }
6462   }
6463 
6464   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6465   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6466   for (k = 0; k < n1; k++) {
6467     if (i1[k] > PETSC_MIN_INT) break;
6468   }                                                                               /* Advance k to the first entry we need to take care of */
6469   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6470   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6471 
6472   /* ---------------------------------------------------------------------------*/
6473   /*           Split local rows into diag/offdiag portions                      */
6474   /* ---------------------------------------------------------------------------*/
6475   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6476   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6477   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6478 
6479   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6480   PetscCall(PetscMalloc1(n1 - rem, &Cperm1));
6481   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6482 
6483   /* ---------------------------------------------------------------------------*/
6484   /*           Send remote rows to their owner                                  */
6485   /* ---------------------------------------------------------------------------*/
6486   /* Find which rows should be sent to which remote ranks*/
6487   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6488   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6489   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6490   const PetscInt *ranges;
6491   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6492 
6493   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6494   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6495   for (k = rem; k < n1;) {
6496     PetscMPIInt owner;
6497     PetscInt    firstRow, lastRow;
6498 
6499     /* Locate a row range */
6500     firstRow = i1[k]; /* first row of this owner */
6501     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6502     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6503 
6504     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6505     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6506 
6507     /* All entries in [k,p) belong to this remote owner */
6508     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6509       PetscMPIInt *sendto2;
6510       PetscInt    *nentries2;
6511       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6512 
6513       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6514       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6515       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6516       PetscCall(PetscFree2(sendto, nentries2));
6517       sendto   = sendto2;
6518       nentries = nentries2;
6519       maxNsend = maxNsend2;
6520     }
6521     sendto[nsend]   = owner;
6522     nentries[nsend] = p - k;
6523     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6524     nsend++;
6525     k = p;
6526   }
6527 
6528   /* Build 1st SF to know offsets on remote to send data */
6529   PetscSF      sf1;
6530   PetscInt     nroots = 1, nroots2 = 0;
6531   PetscInt     nleaves = nsend, nleaves2 = 0;
6532   PetscInt    *offsets;
6533   PetscSFNode *iremote;
6534 
6535   PetscCall(PetscSFCreate(comm, &sf1));
6536   PetscCall(PetscMalloc1(nsend, &iremote));
6537   PetscCall(PetscMalloc1(nsend, &offsets));
6538   for (k = 0; k < nsend; k++) {
6539     iremote[k].rank  = sendto[k];
6540     iremote[k].index = 0;
6541     nleaves2 += nentries[k];
6542     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6543   }
6544   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6545   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6546   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6547   PetscCall(PetscSFDestroy(&sf1));
6548   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6549 
6550   /* Build 2nd SF to send remote COOs to their owner */
6551   PetscSF sf2;
6552   nroots  = nroots2;
6553   nleaves = nleaves2;
6554   PetscCall(PetscSFCreate(comm, &sf2));
6555   PetscCall(PetscSFSetFromOptions(sf2));
6556   PetscCall(PetscMalloc1(nleaves, &iremote));
6557   p = 0;
6558   for (k = 0; k < nsend; k++) {
6559     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6560     for (q = 0; q < nentries[k]; q++, p++) {
6561       iremote[p].rank  = sendto[k];
6562       iremote[p].index = offsets[k] + q;
6563     }
6564   }
6565   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6566 
6567   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6568   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem));
6569 
6570   /* Send the remote COOs to their owner */
6571   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6572   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6573   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6574   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6575   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6576   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6577   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6578 
6579   PetscCall(PetscFree(offsets));
6580   PetscCall(PetscFree2(sendto, nentries));
6581 
6582   /* ---------------------------------------------------------------*/
6583   /* Sort received COOs by row along with the permutation array     */
6584   /* ---------------------------------------------------------------*/
6585   for (k = 0; k < n2; k++) perm2[k] = k;
6586   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6587 
6588   /* ---------------------------------------------------------------*/
6589   /* Split received COOs into diag/offdiag portions                 */
6590   /* ---------------------------------------------------------------*/
6591   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6592   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6593   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6594 
6595   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6596   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6597 
6598   /* --------------------------------------------------------------------------*/
6599   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6600   /* --------------------------------------------------------------------------*/
6601   PetscInt *Ai, *Bi;
6602   PetscInt *Aj, *Bj;
6603 
6604   PetscCall(PetscMalloc1(m + 1, &Ai));
6605   PetscCall(PetscMalloc1(m + 1, &Bi));
6606   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6607   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6608 
6609   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6610   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6611   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6612   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6613   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6614 
6615   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6616   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6617 
6618   /* --------------------------------------------------------------------------*/
6619   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6620   /* expect nonzeros in A/B most likely have local contributing entries        */
6621   /* --------------------------------------------------------------------------*/
6622   PetscInt    Annz = Ai[m];
6623   PetscInt    Bnnz = Bi[m];
6624   PetscCount *Ajmap1_new, *Bjmap1_new;
6625 
6626   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6627   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6628 
6629   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6630   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6631 
6632   PetscCall(PetscFree(Aimap1));
6633   PetscCall(PetscFree(Ajmap1));
6634   PetscCall(PetscFree(Bimap1));
6635   PetscCall(PetscFree(Bjmap1));
6636   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6637   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6638   PetscCall(PetscFree(perm1));
6639   PetscCall(PetscFree3(i2, j2, perm2));
6640 
6641   Ajmap1 = Ajmap1_new;
6642   Bjmap1 = Bjmap1_new;
6643 
6644   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6645   if (Annz < Annz1 + Annz2) {
6646     PetscInt *Aj_new;
6647     PetscCall(PetscMalloc1(Annz, &Aj_new));
6648     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6649     PetscCall(PetscFree(Aj));
6650     Aj = Aj_new;
6651   }
6652 
6653   if (Bnnz < Bnnz1 + Bnnz2) {
6654     PetscInt *Bj_new;
6655     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6656     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6657     PetscCall(PetscFree(Bj));
6658     Bj = Bj_new;
6659   }
6660 
6661   /* --------------------------------------------------------------------------------*/
6662   /* Create new submatrices for on-process and off-process coupling                  */
6663   /* --------------------------------------------------------------------------------*/
6664   PetscScalar *Aa, *Ba;
6665   MatType      rtype;
6666   Mat_SeqAIJ  *a, *b;
6667   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6668   PetscCall(PetscCalloc1(Bnnz, &Ba));
6669   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6670   if (cstart) {
6671     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6672   }
6673   PetscCall(MatDestroy(&mpiaij->A));
6674   PetscCall(MatDestroy(&mpiaij->B));
6675   PetscCall(MatGetRootType_Private(mat, &rtype));
6676   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6677   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6678   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6679 
6680   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6681   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6682   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6683   a->free_a = b->free_a = PETSC_TRUE;
6684   a->free_ij = b->free_ij = PETSC_TRUE;
6685 
6686   /* conversion must happen AFTER multiply setup */
6687   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6688   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6689   PetscCall(VecDestroy(&mpiaij->lvec));
6690   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6691 
6692   mpiaij->coo_n   = coo_n;
6693   mpiaij->coo_sf  = sf2;
6694   mpiaij->sendlen = nleaves;
6695   mpiaij->recvlen = nroots;
6696 
6697   mpiaij->Annz = Annz;
6698   mpiaij->Bnnz = Bnnz;
6699 
6700   mpiaij->Annz2 = Annz2;
6701   mpiaij->Bnnz2 = Bnnz2;
6702 
6703   mpiaij->Atot1 = Atot1;
6704   mpiaij->Atot2 = Atot2;
6705   mpiaij->Btot1 = Btot1;
6706   mpiaij->Btot2 = Btot2;
6707 
6708   mpiaij->Ajmap1 = Ajmap1;
6709   mpiaij->Aperm1 = Aperm1;
6710 
6711   mpiaij->Bjmap1 = Bjmap1;
6712   mpiaij->Bperm1 = Bperm1;
6713 
6714   mpiaij->Aimap2 = Aimap2;
6715   mpiaij->Ajmap2 = Ajmap2;
6716   mpiaij->Aperm2 = Aperm2;
6717 
6718   mpiaij->Bimap2 = Bimap2;
6719   mpiaij->Bjmap2 = Bjmap2;
6720   mpiaij->Bperm2 = Bperm2;
6721 
6722   mpiaij->Cperm1 = Cperm1;
6723 
6724   /* Allocate in preallocation. If not used, it has zero cost on host */
6725   PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf));
6726   PetscFunctionReturn(PETSC_SUCCESS);
6727 }
6728 
6729 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6730 {
6731   Mat_MPIAIJ       *mpiaij = (Mat_MPIAIJ *)mat->data;
6732   Mat               A = mpiaij->A, B = mpiaij->B;
6733   PetscCount        Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2;
6734   PetscScalar      *Aa, *Ba;
6735   PetscScalar      *sendbuf = mpiaij->sendbuf;
6736   PetscScalar      *recvbuf = mpiaij->recvbuf;
6737   const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2;
6738   const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2;
6739   const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2;
6740   const PetscCount *Cperm1 = mpiaij->Cperm1;
6741 
6742   PetscFunctionBegin;
6743   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6744   PetscCall(MatSeqAIJGetArray(B, &Ba));
6745 
6746   /* Pack entries to be sent to remote */
6747   for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6748 
6749   /* Send remote entries to their owner and overlap the communication with local computation */
6750   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6751   /* Add local entries to A and B */
6752   for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6753     PetscScalar sum = 0.0;                /* Do partial summation first to improve numerical stability */
6754     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6755     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6756   }
6757   for (PetscCount i = 0; i < Bnnz; i++) {
6758     PetscScalar sum = 0.0;
6759     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6760     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6761   }
6762   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6763 
6764   /* Add received remote entries to A and B */
6765   for (PetscCount i = 0; i < Annz2; i++) {
6766     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6767   }
6768   for (PetscCount i = 0; i < Bnnz2; i++) {
6769     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6770   }
6771   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6772   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6773   PetscFunctionReturn(PETSC_SUCCESS);
6774 }
6775 
6776 /* ----------------------------------------------------------------*/
6777 
6778 /*MC
6779    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6780 
6781    Options Database Keys:
6782 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6783 
6784    Level: beginner
6785 
6786    Notes:
6787     `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values,
6788     in this case the values associated with the rows and columns one passes in are set to zero
6789     in the matrix
6790 
6791     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6792     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6793 
6794 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6795 M*/
6796 
6797 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6798 {
6799   Mat_MPIAIJ *b;
6800   PetscMPIInt size;
6801 
6802   PetscFunctionBegin;
6803   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6804 
6805   PetscCall(PetscNew(&b));
6806   B->data = (void *)b;
6807   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
6808   B->assembled  = PETSC_FALSE;
6809   B->insertmode = NOT_SET_VALUES;
6810   b->size       = size;
6811 
6812   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6813 
6814   /* build cache for off array entries formed */
6815   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6816 
6817   b->donotstash  = PETSC_FALSE;
6818   b->colmap      = NULL;
6819   b->garray      = NULL;
6820   b->roworiented = PETSC_TRUE;
6821 
6822   /* stuff used for matrix vector multiply */
6823   b->lvec  = NULL;
6824   b->Mvctx = NULL;
6825 
6826   /* stuff for MatGetRow() */
6827   b->rowindices   = NULL;
6828   b->rowvalues    = NULL;
6829   b->getrowactive = PETSC_FALSE;
6830 
6831   /* flexible pointer used in CUSPARSE classes */
6832   b->spptr = NULL;
6833 
6834   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6835   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6836   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6837   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6838   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6839   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6840   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6841   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6842   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6843   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6844 #if defined(PETSC_HAVE_CUDA)
6845   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6846 #endif
6847 #if defined(PETSC_HAVE_HIP)
6848   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6849 #endif
6850 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6851   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6852 #endif
6853 #if defined(PETSC_HAVE_MKL_SPARSE)
6854   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6855 #endif
6856   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6857   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6858   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6859   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6860 #if defined(PETSC_HAVE_ELEMENTAL)
6861   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6862 #endif
6863 #if defined(PETSC_HAVE_SCALAPACK)
6864   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6865 #endif
6866   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6867   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6868 #if defined(PETSC_HAVE_HYPRE)
6869   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6870   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6871 #endif
6872   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6873   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6874   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6875   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6876   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6877   PetscFunctionReturn(PETSC_SUCCESS);
6878 }
6879 
6880 /*@C
6881      MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6882          and "off-diagonal" part of the matrix in CSR format.
6883 
6884    Collective
6885 
6886    Input Parameters:
6887 +  comm - MPI communicator
6888 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
6889 .  n - This value should be the same as the local size used in creating the
6890        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6891        calculated if N is given) For square matrices n is almost always m.
6892 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
6893 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
6894 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6895 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6896 .   a - matrix values
6897 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6898 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6899 -   oa - matrix values
6900 
6901    Output Parameter:
6902 .   mat - the matrix
6903 
6904    Level: advanced
6905 
6906    Notes:
6907        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6908        must free the arrays once the matrix has been destroyed and not before.
6909 
6910        The i and j indices are 0 based
6911 
6912        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6913 
6914        This sets local rows and cannot be used to set off-processor values.
6915 
6916        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6917        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6918        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6919        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6920        keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6921        communication if it is known that only local entries will be set.
6922 
6923 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6924           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6925 @*/
6926 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6927 {
6928   Mat_MPIAIJ *maij;
6929 
6930   PetscFunctionBegin;
6931   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6932   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6933   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6934   PetscCall(MatCreate(comm, mat));
6935   PetscCall(MatSetSizes(*mat, m, n, M, N));
6936   PetscCall(MatSetType(*mat, MATMPIAIJ));
6937   maij = (Mat_MPIAIJ *)(*mat)->data;
6938 
6939   (*mat)->preallocated = PETSC_TRUE;
6940 
6941   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6942   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6943 
6944   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6945   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6946 
6947   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6948   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6949   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6950   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6951   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6952   PetscFunctionReturn(PETSC_SUCCESS);
6953 }
6954 
6955 typedef struct {
6956   Mat       *mp;    /* intermediate products */
6957   PetscBool *mptmp; /* is the intermediate product temporary ? */
6958   PetscInt   cp;    /* number of intermediate products */
6959 
6960   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6961   PetscInt    *startsj_s, *startsj_r;
6962   PetscScalar *bufa;
6963   Mat          P_oth;
6964 
6965   /* may take advantage of merging product->B */
6966   Mat Bloc; /* B-local by merging diag and off-diag */
6967 
6968   /* cusparse does not have support to split between symbolic and numeric phases.
6969      When api_user is true, we don't need to update the numerical values
6970      of the temporary storage */
6971   PetscBool reusesym;
6972 
6973   /* support for COO values insertion */
6974   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6975   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6976   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6977   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6978   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6979   PetscMemType mtype;
6980 
6981   /* customization */
6982   PetscBool abmerge;
6983   PetscBool P_oth_bind;
6984 } MatMatMPIAIJBACKEND;
6985 
6986 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6987 {
6988   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6989   PetscInt             i;
6990 
6991   PetscFunctionBegin;
6992   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6993   PetscCall(PetscFree(mmdata->bufa));
6994   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6995   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6996   PetscCall(MatDestroy(&mmdata->P_oth));
6997   PetscCall(MatDestroy(&mmdata->Bloc));
6998   PetscCall(PetscSFDestroy(&mmdata->sf));
6999   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7000   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7001   PetscCall(PetscFree(mmdata->own[0]));
7002   PetscCall(PetscFree(mmdata->own));
7003   PetscCall(PetscFree(mmdata->off[0]));
7004   PetscCall(PetscFree(mmdata->off));
7005   PetscCall(PetscFree(mmdata));
7006   PetscFunctionReturn(PETSC_SUCCESS);
7007 }
7008 
7009 /* Copy selected n entries with indices in idx[] of A to v[].
7010    If idx is NULL, copy the whole data array of A to v[]
7011  */
7012 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7013 {
7014   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7015 
7016   PetscFunctionBegin;
7017   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7018   if (f) {
7019     PetscCall((*f)(A, n, idx, v));
7020   } else {
7021     const PetscScalar *vv;
7022 
7023     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7024     if (n && idx) {
7025       PetscScalar    *w  = v;
7026       const PetscInt *oi = idx;
7027       PetscInt        j;
7028 
7029       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7030     } else {
7031       PetscCall(PetscArraycpy(v, vv, n));
7032     }
7033     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7034   }
7035   PetscFunctionReturn(PETSC_SUCCESS);
7036 }
7037 
7038 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7039 {
7040   MatMatMPIAIJBACKEND *mmdata;
7041   PetscInt             i, n_d, n_o;
7042 
7043   PetscFunctionBegin;
7044   MatCheckProduct(C, 1);
7045   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7046   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7047   if (!mmdata->reusesym) { /* update temporary matrices */
7048     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7049     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7050   }
7051   mmdata->reusesym = PETSC_FALSE;
7052 
7053   for (i = 0; i < mmdata->cp; i++) {
7054     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7055     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7056   }
7057   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7058     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7059 
7060     if (mmdata->mptmp[i]) continue;
7061     if (noff) {
7062       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7063 
7064       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7065       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7066       n_o += noff;
7067       n_d += nown;
7068     } else {
7069       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7070 
7071       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7072       n_d += mm->nz;
7073     }
7074   }
7075   if (mmdata->hasoffproc) { /* offprocess insertion */
7076     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7077     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7078   }
7079   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7080   PetscFunctionReturn(PETSC_SUCCESS);
7081 }
7082 
7083 /* Support for Pt * A, A * P, or Pt * A * P */
7084 #define MAX_NUMBER_INTERMEDIATE 4
7085 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7086 {
7087   Mat_Product           *product = C->product;
7088   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7089   Mat_MPIAIJ            *a, *p;
7090   MatMatMPIAIJBACKEND   *mmdata;
7091   ISLocalToGlobalMapping P_oth_l2g = NULL;
7092   IS                     glob      = NULL;
7093   const char            *prefix;
7094   char                   pprefix[256];
7095   const PetscInt        *globidx, *P_oth_idx;
7096   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7097   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7098   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7099                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7100                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7101   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7102 
7103   MatProductType ptype;
7104   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7105   PetscMPIInt    size;
7106 
7107   PetscFunctionBegin;
7108   MatCheckProduct(C, 1);
7109   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7110   ptype = product->type;
7111   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7112     ptype                                          = MATPRODUCT_AB;
7113     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7114   }
7115   switch (ptype) {
7116   case MATPRODUCT_AB:
7117     A          = product->A;
7118     P          = product->B;
7119     m          = A->rmap->n;
7120     n          = P->cmap->n;
7121     M          = A->rmap->N;
7122     N          = P->cmap->N;
7123     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7124     break;
7125   case MATPRODUCT_AtB:
7126     P          = product->A;
7127     A          = product->B;
7128     m          = P->cmap->n;
7129     n          = A->cmap->n;
7130     M          = P->cmap->N;
7131     N          = A->cmap->N;
7132     hasoffproc = PETSC_TRUE;
7133     break;
7134   case MATPRODUCT_PtAP:
7135     A          = product->A;
7136     P          = product->B;
7137     m          = P->cmap->n;
7138     n          = P->cmap->n;
7139     M          = P->cmap->N;
7140     N          = P->cmap->N;
7141     hasoffproc = PETSC_TRUE;
7142     break;
7143   default:
7144     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7145   }
7146   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7147   if (size == 1) hasoffproc = PETSC_FALSE;
7148 
7149   /* defaults */
7150   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7151     mp[i]    = NULL;
7152     mptmp[i] = PETSC_FALSE;
7153     rmapt[i] = -1;
7154     cmapt[i] = -1;
7155     rmapa[i] = NULL;
7156     cmapa[i] = NULL;
7157   }
7158 
7159   /* customization */
7160   PetscCall(PetscNew(&mmdata));
7161   mmdata->reusesym = product->api_user;
7162   if (ptype == MATPRODUCT_AB) {
7163     if (product->api_user) {
7164       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7165       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7166       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7167       PetscOptionsEnd();
7168     } else {
7169       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7170       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7171       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7172       PetscOptionsEnd();
7173     }
7174   } else if (ptype == MATPRODUCT_PtAP) {
7175     if (product->api_user) {
7176       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7177       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7178       PetscOptionsEnd();
7179     } else {
7180       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7181       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7182       PetscOptionsEnd();
7183     }
7184   }
7185   a = (Mat_MPIAIJ *)A->data;
7186   p = (Mat_MPIAIJ *)P->data;
7187   PetscCall(MatSetSizes(C, m, n, M, N));
7188   PetscCall(PetscLayoutSetUp(C->rmap));
7189   PetscCall(PetscLayoutSetUp(C->cmap));
7190   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7191   PetscCall(MatGetOptionsPrefix(C, &prefix));
7192 
7193   cp = 0;
7194   switch (ptype) {
7195   case MATPRODUCT_AB: /* A * P */
7196     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7197 
7198     /* A_diag * P_local (merged or not) */
7199     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7200       /* P is product->B */
7201       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7202       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7203       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7204       PetscCall(MatProductSetFill(mp[cp], product->fill));
7205       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7206       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7207       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7208       mp[cp]->product->api_user = product->api_user;
7209       PetscCall(MatProductSetFromOptions(mp[cp]));
7210       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7211       PetscCall(ISGetIndices(glob, &globidx));
7212       rmapt[cp] = 1;
7213       cmapt[cp] = 2;
7214       cmapa[cp] = globidx;
7215       mptmp[cp] = PETSC_FALSE;
7216       cp++;
7217     } else { /* A_diag * P_diag and A_diag * P_off */
7218       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7219       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7220       PetscCall(MatProductSetFill(mp[cp], product->fill));
7221       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7222       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7223       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7224       mp[cp]->product->api_user = product->api_user;
7225       PetscCall(MatProductSetFromOptions(mp[cp]));
7226       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7227       rmapt[cp] = 1;
7228       cmapt[cp] = 1;
7229       mptmp[cp] = PETSC_FALSE;
7230       cp++;
7231       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7232       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7233       PetscCall(MatProductSetFill(mp[cp], product->fill));
7234       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7235       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7236       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7237       mp[cp]->product->api_user = product->api_user;
7238       PetscCall(MatProductSetFromOptions(mp[cp]));
7239       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7240       rmapt[cp] = 1;
7241       cmapt[cp] = 2;
7242       cmapa[cp] = p->garray;
7243       mptmp[cp] = PETSC_FALSE;
7244       cp++;
7245     }
7246 
7247     /* A_off * P_other */
7248     if (mmdata->P_oth) {
7249       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7250       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7251       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7252       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7253       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7254       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7255       PetscCall(MatProductSetFill(mp[cp], product->fill));
7256       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7257       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7258       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7259       mp[cp]->product->api_user = product->api_user;
7260       PetscCall(MatProductSetFromOptions(mp[cp]));
7261       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7262       rmapt[cp] = 1;
7263       cmapt[cp] = 2;
7264       cmapa[cp] = P_oth_idx;
7265       mptmp[cp] = PETSC_FALSE;
7266       cp++;
7267     }
7268     break;
7269 
7270   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7271     /* A is product->B */
7272     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7273     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7274       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7275       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7276       PetscCall(MatProductSetFill(mp[cp], product->fill));
7277       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7278       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7279       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7280       mp[cp]->product->api_user = product->api_user;
7281       PetscCall(MatProductSetFromOptions(mp[cp]));
7282       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7283       PetscCall(ISGetIndices(glob, &globidx));
7284       rmapt[cp] = 2;
7285       rmapa[cp] = globidx;
7286       cmapt[cp] = 2;
7287       cmapa[cp] = globidx;
7288       mptmp[cp] = PETSC_FALSE;
7289       cp++;
7290     } else {
7291       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7292       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7293       PetscCall(MatProductSetFill(mp[cp], product->fill));
7294       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7295       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7296       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7297       mp[cp]->product->api_user = product->api_user;
7298       PetscCall(MatProductSetFromOptions(mp[cp]));
7299       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7300       PetscCall(ISGetIndices(glob, &globidx));
7301       rmapt[cp] = 1;
7302       cmapt[cp] = 2;
7303       cmapa[cp] = globidx;
7304       mptmp[cp] = PETSC_FALSE;
7305       cp++;
7306       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7307       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7308       PetscCall(MatProductSetFill(mp[cp], product->fill));
7309       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7310       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7311       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7312       mp[cp]->product->api_user = product->api_user;
7313       PetscCall(MatProductSetFromOptions(mp[cp]));
7314       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7315       rmapt[cp] = 2;
7316       rmapa[cp] = p->garray;
7317       cmapt[cp] = 2;
7318       cmapa[cp] = globidx;
7319       mptmp[cp] = PETSC_FALSE;
7320       cp++;
7321     }
7322     break;
7323   case MATPRODUCT_PtAP:
7324     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7325     /* P is product->B */
7326     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7327     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7328     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7329     PetscCall(MatProductSetFill(mp[cp], product->fill));
7330     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7331     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7332     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7333     mp[cp]->product->api_user = product->api_user;
7334     PetscCall(MatProductSetFromOptions(mp[cp]));
7335     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7336     PetscCall(ISGetIndices(glob, &globidx));
7337     rmapt[cp] = 2;
7338     rmapa[cp] = globidx;
7339     cmapt[cp] = 2;
7340     cmapa[cp] = globidx;
7341     mptmp[cp] = PETSC_FALSE;
7342     cp++;
7343     if (mmdata->P_oth) {
7344       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7345       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7346       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7347       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7348       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7349       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7350       PetscCall(MatProductSetFill(mp[cp], product->fill));
7351       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7352       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7353       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7354       mp[cp]->product->api_user = product->api_user;
7355       PetscCall(MatProductSetFromOptions(mp[cp]));
7356       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7357       mptmp[cp] = PETSC_TRUE;
7358       cp++;
7359       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7360       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7361       PetscCall(MatProductSetFill(mp[cp], product->fill));
7362       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7363       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7364       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7365       mp[cp]->product->api_user = product->api_user;
7366       PetscCall(MatProductSetFromOptions(mp[cp]));
7367       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7368       rmapt[cp] = 2;
7369       rmapa[cp] = globidx;
7370       cmapt[cp] = 2;
7371       cmapa[cp] = P_oth_idx;
7372       mptmp[cp] = PETSC_FALSE;
7373       cp++;
7374     }
7375     break;
7376   default:
7377     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7378   }
7379   /* sanity check */
7380   if (size > 1)
7381     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7382 
7383   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7384   for (i = 0; i < cp; i++) {
7385     mmdata->mp[i]    = mp[i];
7386     mmdata->mptmp[i] = mptmp[i];
7387   }
7388   mmdata->cp             = cp;
7389   C->product->data       = mmdata;
7390   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7391   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7392 
7393   /* memory type */
7394   mmdata->mtype = PETSC_MEMTYPE_HOST;
7395   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7396   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7397   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7398   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7399   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7400   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7401 
7402   /* prepare coo coordinates for values insertion */
7403 
7404   /* count total nonzeros of those intermediate seqaij Mats
7405     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7406     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7407     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7408   */
7409   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7410     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7411     if (mptmp[cp]) continue;
7412     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7413       const PetscInt *rmap = rmapa[cp];
7414       const PetscInt  mr   = mp[cp]->rmap->n;
7415       const PetscInt  rs   = C->rmap->rstart;
7416       const PetscInt  re   = C->rmap->rend;
7417       const PetscInt *ii   = mm->i;
7418       for (i = 0; i < mr; i++) {
7419         const PetscInt gr = rmap[i];
7420         const PetscInt nz = ii[i + 1] - ii[i];
7421         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7422         else ncoo_oown += nz;                  /* this row is local */
7423       }
7424     } else ncoo_d += mm->nz;
7425   }
7426 
7427   /*
7428     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7429 
7430     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7431 
7432     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7433 
7434     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7435     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7436     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7437 
7438     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7439     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7440   */
7441   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7442   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7443 
7444   /* gather (i,j) of nonzeros inserted by remote procs */
7445   if (hasoffproc) {
7446     PetscSF  msf;
7447     PetscInt ncoo2, *coo_i2, *coo_j2;
7448 
7449     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7450     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7451     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7452 
7453     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7454       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7455       PetscInt   *idxoff = mmdata->off[cp];
7456       PetscInt   *idxown = mmdata->own[cp];
7457       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7458         const PetscInt *rmap = rmapa[cp];
7459         const PetscInt *cmap = cmapa[cp];
7460         const PetscInt *ii   = mm->i;
7461         PetscInt       *coi  = coo_i + ncoo_o;
7462         PetscInt       *coj  = coo_j + ncoo_o;
7463         const PetscInt  mr   = mp[cp]->rmap->n;
7464         const PetscInt  rs   = C->rmap->rstart;
7465         const PetscInt  re   = C->rmap->rend;
7466         const PetscInt  cs   = C->cmap->rstart;
7467         for (i = 0; i < mr; i++) {
7468           const PetscInt *jj = mm->j + ii[i];
7469           const PetscInt  gr = rmap[i];
7470           const PetscInt  nz = ii[i + 1] - ii[i];
7471           if (gr < rs || gr >= re) { /* this is an offproc row */
7472             for (j = ii[i]; j < ii[i + 1]; j++) {
7473               *coi++    = gr;
7474               *idxoff++ = j;
7475             }
7476             if (!cmapt[cp]) { /* already global */
7477               for (j = 0; j < nz; j++) *coj++ = jj[j];
7478             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7479               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7480             } else { /* offdiag */
7481               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7482             }
7483             ncoo_o += nz;
7484           } else { /* this is a local row */
7485             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7486           }
7487         }
7488       }
7489       mmdata->off[cp + 1] = idxoff;
7490       mmdata->own[cp + 1] = idxown;
7491     }
7492 
7493     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7494     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7495     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7496     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7497     ncoo = ncoo_d + ncoo_oown + ncoo2;
7498     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7499     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7500     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7501     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7502     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7503     PetscCall(PetscFree2(coo_i, coo_j));
7504     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7505     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7506     coo_i = coo_i2;
7507     coo_j = coo_j2;
7508   } else { /* no offproc values insertion */
7509     ncoo = ncoo_d;
7510     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7511 
7512     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7513     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7514     PetscCall(PetscSFSetUp(mmdata->sf));
7515   }
7516   mmdata->hasoffproc = hasoffproc;
7517 
7518   /* gather (i,j) of nonzeros inserted locally */
7519   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7520     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7521     PetscInt       *coi  = coo_i + ncoo_d;
7522     PetscInt       *coj  = coo_j + ncoo_d;
7523     const PetscInt *jj   = mm->j;
7524     const PetscInt *ii   = mm->i;
7525     const PetscInt *cmap = cmapa[cp];
7526     const PetscInt *rmap = rmapa[cp];
7527     const PetscInt  mr   = mp[cp]->rmap->n;
7528     const PetscInt  rs   = C->rmap->rstart;
7529     const PetscInt  re   = C->rmap->rend;
7530     const PetscInt  cs   = C->cmap->rstart;
7531 
7532     if (mptmp[cp]) continue;
7533     if (rmapt[cp] == 1) { /* consecutive rows */
7534       /* fill coo_i */
7535       for (i = 0; i < mr; i++) {
7536         const PetscInt gr = i + rs;
7537         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7538       }
7539       /* fill coo_j */
7540       if (!cmapt[cp]) { /* type-0, already global */
7541         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7542       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7543         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7544       } else {                                            /* type-2, local to global for sparse columns */
7545         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7546       }
7547       ncoo_d += mm->nz;
7548     } else if (rmapt[cp] == 2) { /* sparse rows */
7549       for (i = 0; i < mr; i++) {
7550         const PetscInt *jj = mm->j + ii[i];
7551         const PetscInt  gr = rmap[i];
7552         const PetscInt  nz = ii[i + 1] - ii[i];
7553         if (gr >= rs && gr < re) { /* local rows */
7554           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7555           if (!cmapt[cp]) { /* type-0, already global */
7556             for (j = 0; j < nz; j++) *coj++ = jj[j];
7557           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7558             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7559           } else { /* type-2, local to global for sparse columns */
7560             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7561           }
7562           ncoo_d += nz;
7563         }
7564       }
7565     }
7566   }
7567   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7568   PetscCall(ISDestroy(&glob));
7569   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7570   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7571   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7572   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7573 
7574   /* preallocate with COO data */
7575   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7576   PetscCall(PetscFree2(coo_i, coo_j));
7577   PetscFunctionReturn(PETSC_SUCCESS);
7578 }
7579 
7580 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7581 {
7582   Mat_Product *product = mat->product;
7583 #if defined(PETSC_HAVE_DEVICE)
7584   PetscBool match  = PETSC_FALSE;
7585   PetscBool usecpu = PETSC_FALSE;
7586 #else
7587   PetscBool match = PETSC_TRUE;
7588 #endif
7589 
7590   PetscFunctionBegin;
7591   MatCheckProduct(mat, 1);
7592 #if defined(PETSC_HAVE_DEVICE)
7593   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7594   if (match) { /* we can always fallback to the CPU if requested */
7595     switch (product->type) {
7596     case MATPRODUCT_AB:
7597       if (product->api_user) {
7598         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7599         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7600         PetscOptionsEnd();
7601       } else {
7602         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7603         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7604         PetscOptionsEnd();
7605       }
7606       break;
7607     case MATPRODUCT_AtB:
7608       if (product->api_user) {
7609         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7610         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7611         PetscOptionsEnd();
7612       } else {
7613         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7614         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7615         PetscOptionsEnd();
7616       }
7617       break;
7618     case MATPRODUCT_PtAP:
7619       if (product->api_user) {
7620         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7621         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7622         PetscOptionsEnd();
7623       } else {
7624         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7625         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7626         PetscOptionsEnd();
7627       }
7628       break;
7629     default:
7630       break;
7631     }
7632     match = (PetscBool)!usecpu;
7633   }
7634 #endif
7635   if (match) {
7636     switch (product->type) {
7637     case MATPRODUCT_AB:
7638     case MATPRODUCT_AtB:
7639     case MATPRODUCT_PtAP:
7640       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7641       break;
7642     default:
7643       break;
7644     }
7645   }
7646   /* fallback to MPIAIJ ops */
7647   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7648   PetscFunctionReturn(PETSC_SUCCESS);
7649 }
7650 
7651 /*
7652    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7653 
7654    n - the number of block indices in cc[]
7655    cc - the block indices (must be large enough to contain the indices)
7656 */
7657 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7658 {
7659   PetscInt        cnt = -1, nidx, j;
7660   const PetscInt *idx;
7661 
7662   PetscFunctionBegin;
7663   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7664   if (nidx) {
7665     cnt     = 0;
7666     cc[cnt] = idx[0] / bs;
7667     for (j = 1; j < nidx; j++) {
7668       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7669     }
7670   }
7671   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7672   *n = cnt + 1;
7673   PetscFunctionReturn(PETSC_SUCCESS);
7674 }
7675 
7676 /*
7677     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7678 
7679     ncollapsed - the number of block indices
7680     collapsed - the block indices (must be large enough to contain the indices)
7681 */
7682 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7683 {
7684   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7685 
7686   PetscFunctionBegin;
7687   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7688   for (i = start + 1; i < start + bs; i++) {
7689     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7690     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7691     cprevtmp = cprev;
7692     cprev    = merged;
7693     merged   = cprevtmp;
7694   }
7695   *ncollapsed = nprev;
7696   if (collapsed) *collapsed = cprev;
7697   PetscFunctionReturn(PETSC_SUCCESS);
7698 }
7699 
7700 /*
7701    This will eventually be folded into MatCreateGraph_AIJ() for optimal performance
7702 */
7703 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG)
7704 {
7705   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7706   Mat                tGmat;
7707   MPI_Comm           comm;
7708   const PetscScalar *vals;
7709   const PetscInt    *idx;
7710   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7711   MatScalar         *AA; // this is checked in graph
7712   PetscBool          isseqaij;
7713   Mat                a, b, c;
7714   MatType            jtype;
7715 
7716   PetscFunctionBegin;
7717   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
7718   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
7719   PetscCall(MatGetType(Gmat, &jtype));
7720   PetscCall(MatCreate(comm, &tGmat));
7721   PetscCall(MatSetType(tGmat, jtype));
7722 
7723   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7724                Also, if the matrix is symmetric, can we skip this
7725                operation? It can be very expensive on large matrices. */
7726 
7727   // global sizes
7728   PetscCall(MatGetSize(Gmat, &MM, &NN));
7729   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7730   nloc = Iend - Istart;
7731   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
7732   if (isseqaij) {
7733     a = Gmat;
7734     b = NULL;
7735   } else {
7736     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7737     a             = d->A;
7738     b             = d->B;
7739     garray        = d->garray;
7740   }
7741   /* Determine upper bound on non-zeros needed in new filtered matrix */
7742   for (PetscInt row = 0; row < nloc; row++) {
7743     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
7744     d_nnz[row] = ncols;
7745     if (ncols > maxcols) maxcols = ncols;
7746     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
7747   }
7748   if (b) {
7749     for (PetscInt row = 0; row < nloc; row++) {
7750       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
7751       o_nnz[row] = ncols;
7752       if (ncols > maxcols) maxcols = ncols;
7753       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
7754     }
7755   }
7756   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
7757   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7758   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
7759   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
7760   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7761   PetscCall(PetscFree2(d_nnz, o_nnz));
7762   //
7763   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
7764   nnz0 = nnz1 = 0;
7765   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7766     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7767       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
7768       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7769         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7770         if (PetscRealPart(sv) > vfilter) {
7771           nnz1++;
7772           PetscInt cid = idx[jj] + Istart; //diag
7773           if (c != a) cid = garray[idx[jj]];
7774           AA[ncol_row] = vals[jj];
7775           AJ[ncol_row] = cid;
7776           ncol_row++;
7777         }
7778       }
7779       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
7780       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
7781     }
7782   }
7783   PetscCall(PetscFree2(AA, AJ));
7784   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
7785   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
7786   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
7787 
7788   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
7789 
7790   *filteredG = tGmat;
7791   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7792   PetscFunctionReturn(PETSC_SUCCESS);
7793 }
7794 
7795 /*
7796  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7797 
7798  Input Parameter:
7799  . Amat - matrix
7800  - symmetrize - make the result symmetric
7801  + scale - scale with diagonal
7802 
7803  Output Parameter:
7804  . a_Gmat - output scalar graph >= 0
7805 
7806  */
7807 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat)
7808 {
7809   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7810   MPI_Comm  comm;
7811   Mat       Gmat;
7812   PetscBool ismpiaij, isseqaij;
7813   Mat       a, b, c;
7814   MatType   jtype;
7815 
7816   PetscFunctionBegin;
7817   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7818   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7819   PetscCall(MatGetSize(Amat, &MM, &NN));
7820   PetscCall(MatGetBlockSize(Amat, &bs));
7821   nloc = (Iend - Istart) / bs;
7822 
7823   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7824   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7825   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7826 
7827   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7828   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7829      implementation */
7830   if (bs > 1) {
7831     PetscCall(MatGetType(Amat, &jtype));
7832     PetscCall(MatCreate(comm, &Gmat));
7833     PetscCall(MatSetType(Gmat, jtype));
7834     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7835     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7836     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7837       PetscInt  *d_nnz, *o_nnz;
7838       MatScalar *aa, val, *AA;
7839       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7840       if (isseqaij) {
7841         a = Amat;
7842         b = NULL;
7843       } else {
7844         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7845         a             = d->A;
7846         b             = d->B;
7847       }
7848       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7849       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7850       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7851         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7852         const PetscInt *cols;
7853         for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows
7854           PetscCall(MatGetRow(c, brow, &jj, &cols, NULL));
7855           nnz[brow / bs] = jj / bs;
7856           if (jj % bs) ok = 0;
7857           if (cols) j0 = cols[0];
7858           else j0 = -1;
7859           PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL));
7860           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7861           for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks
7862             PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL));
7863             if (jj % bs) ok = 0;
7864             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7865             if (nnz[brow / bs] != jj / bs) ok = 0;
7866             PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL));
7867           }
7868           if (!ok) {
7869             PetscCall(PetscFree2(d_nnz, o_nnz));
7870             goto old_bs;
7871           }
7872         }
7873       }
7874       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7875       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7876       PetscCall(PetscFree2(d_nnz, o_nnz));
7877       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7878       // diag
7879       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7880         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7881         ai               = aseq->i;
7882         n                = ai[brow + 1] - ai[brow];
7883         aj               = aseq->j + ai[brow];
7884         for (int k = 0; k < n; k += bs) {        // block columns
7885           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7886           val        = 0;
7887           for (int ii = 0; ii < bs; ii++) { // rows in block
7888             aa = aseq->a + ai[brow + ii] + k;
7889             for (int jj = 0; jj < bs; jj++) {         // columns in block
7890               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7891             }
7892           }
7893           AA[k / bs] = val;
7894         }
7895         grow = Istart / bs + brow / bs;
7896         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7897       }
7898       // off-diag
7899       if (ismpiaij) {
7900         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7901         const PetscScalar *vals;
7902         const PetscInt    *cols, *garray = aij->garray;
7903         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7904         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7905           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7906           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7907             AA[k / bs] = 0;
7908             AJ[cidx]   = garray[cols[k]] / bs;
7909           }
7910           nc = ncols / bs;
7911           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7912           for (int ii = 0; ii < bs; ii++) { // rows in block
7913             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7914             for (int k = 0; k < ncols; k += bs) {
7915               for (int jj = 0; jj < bs; jj++) { // cols in block
7916                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7917               }
7918             }
7919             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7920           }
7921           grow = Istart / bs + brow / bs;
7922           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7923         }
7924       }
7925       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7926       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7927       PetscCall(PetscFree2(AA, AJ));
7928     } else {
7929       const PetscScalar *vals;
7930       const PetscInt    *idx;
7931       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7932     old_bs:
7933       /*
7934        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7935        */
7936       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7937       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7938       if (isseqaij) {
7939         PetscInt max_d_nnz;
7940         /*
7941          Determine exact preallocation count for (sequential) scalar matrix
7942          */
7943         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7944         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7945         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7946         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7947         PetscCall(PetscFree3(w0, w1, w2));
7948       } else if (ismpiaij) {
7949         Mat             Daij, Oaij;
7950         const PetscInt *garray;
7951         PetscInt        max_d_nnz;
7952         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7953         /*
7954          Determine exact preallocation count for diagonal block portion of scalar matrix
7955          */
7956         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7957         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7958         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7959         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7960         PetscCall(PetscFree3(w0, w1, w2));
7961         /*
7962          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7963          */
7964         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7965           o_nnz[jj] = 0;
7966           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7967             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7968             o_nnz[jj] += ncols;
7969             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7970           }
7971           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7972         }
7973       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7974       /* get scalar copy (norms) of matrix */
7975       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7976       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7977       PetscCall(PetscFree2(d_nnz, o_nnz));
7978       for (Ii = Istart; Ii < Iend; Ii++) {
7979         PetscInt dest_row = Ii / bs;
7980         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7981         for (jj = 0; jj < ncols; jj++) {
7982           PetscInt    dest_col = idx[jj] / bs;
7983           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7984           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7985         }
7986         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7987       }
7988       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7989       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7990     }
7991   } else {
7992     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7993     else {
7994       Gmat = Amat;
7995       PetscCall(PetscObjectReference((PetscObject)Gmat));
7996     }
7997     if (isseqaij) {
7998       a = Gmat;
7999       b = NULL;
8000     } else {
8001       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8002       a             = d->A;
8003       b             = d->B;
8004     }
8005     if (filter >= 0 || scale) {
8006       /* take absolute value of each entry */
8007       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8008         MatInfo      info;
8009         PetscScalar *avals;
8010         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8011         PetscCall(MatSeqAIJGetArray(c, &avals));
8012         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8013         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8014       }
8015     }
8016   }
8017   if (symmetrize) {
8018     PetscBool isset, issym;
8019     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8020     if (!isset || !issym) {
8021       Mat matTrans;
8022       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8023       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8024       PetscCall(MatDestroy(&matTrans));
8025     }
8026     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8027   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8028   if (scale) {
8029     /* scale c for all diagonal values = 1 or -1 */
8030     Vec diag;
8031     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8032     PetscCall(MatGetDiagonal(Gmat, diag));
8033     PetscCall(VecReciprocal(diag));
8034     PetscCall(VecSqrtAbs(diag));
8035     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8036     PetscCall(VecDestroy(&diag));
8037   }
8038   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8039 
8040   if (filter >= 0) {
8041     Mat Fmat = NULL; /* some silly compiler needs this */
8042 
8043     PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat));
8044     PetscCall(MatDestroy(&Gmat));
8045     Gmat = Fmat;
8046   }
8047   *a_Gmat = Gmat;
8048   PetscFunctionReturn(PETSC_SUCCESS);
8049 }
8050 
8051 /*
8052     Special version for direct calls from Fortran
8053 */
8054 #include <petsc/private/fortranimpl.h>
8055 
8056 /* Change these macros so can be used in void function */
8057 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8058 #undef PetscCall
8059 #define PetscCall(...) \
8060   do { \
8061     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8062     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8063       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8064       return; \
8065     } \
8066   } while (0)
8067 
8068 #undef SETERRQ
8069 #define SETERRQ(comm, ierr, ...) \
8070   do { \
8071     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8072     return; \
8073   } while (0)
8074 
8075 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8076   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8077 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8078   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8079 #else
8080 #endif
8081 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8082 {
8083   Mat         mat = *mmat;
8084   PetscInt    m = *mm, n = *mn;
8085   InsertMode  addv = *maddv;
8086   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8087   PetscScalar value;
8088 
8089   MatCheckPreallocated(mat, 1);
8090   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8091   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8092   {
8093     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8094     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8095     PetscBool roworiented = aij->roworiented;
8096 
8097     /* Some Variables required in the macro */
8098     Mat         A     = aij->A;
8099     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8100     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8101     MatScalar  *aa;
8102     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8103     Mat         B                 = aij->B;
8104     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8105     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8106     MatScalar  *ba;
8107     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8108      * cannot use "#if defined" inside a macro. */
8109     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8110 
8111     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8112     PetscInt   nonew = a->nonew;
8113     MatScalar *ap1, *ap2;
8114 
8115     PetscFunctionBegin;
8116     PetscCall(MatSeqAIJGetArray(A, &aa));
8117     PetscCall(MatSeqAIJGetArray(B, &ba));
8118     for (i = 0; i < m; i++) {
8119       if (im[i] < 0) continue;
8120       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8121       if (im[i] >= rstart && im[i] < rend) {
8122         row      = im[i] - rstart;
8123         lastcol1 = -1;
8124         rp1      = aj + ai[row];
8125         ap1      = aa + ai[row];
8126         rmax1    = aimax[row];
8127         nrow1    = ailen[row];
8128         low1     = 0;
8129         high1    = nrow1;
8130         lastcol2 = -1;
8131         rp2      = bj + bi[row];
8132         ap2      = ba + bi[row];
8133         rmax2    = bimax[row];
8134         nrow2    = bilen[row];
8135         low2     = 0;
8136         high2    = nrow2;
8137 
8138         for (j = 0; j < n; j++) {
8139           if (roworiented) value = v[i * n + j];
8140           else value = v[i + j * m];
8141           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8142           if (in[j] >= cstart && in[j] < cend) {
8143             col = in[j] - cstart;
8144             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8145           } else if (in[j] < 0) continue;
8146           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8147             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8148           } else {
8149             if (mat->was_assembled) {
8150               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8151 #if defined(PETSC_USE_CTABLE)
8152               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8153               col--;
8154 #else
8155               col = aij->colmap[in[j]] - 1;
8156 #endif
8157               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8158                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8159                 col = in[j];
8160                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8161                 B        = aij->B;
8162                 b        = (Mat_SeqAIJ *)B->data;
8163                 bimax    = b->imax;
8164                 bi       = b->i;
8165                 bilen    = b->ilen;
8166                 bj       = b->j;
8167                 rp2      = bj + bi[row];
8168                 ap2      = ba + bi[row];
8169                 rmax2    = bimax[row];
8170                 nrow2    = bilen[row];
8171                 low2     = 0;
8172                 high2    = nrow2;
8173                 bm       = aij->B->rmap->n;
8174                 ba       = b->a;
8175                 inserted = PETSC_FALSE;
8176               }
8177             } else col = in[j];
8178             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8179           }
8180         }
8181       } else if (!aij->donotstash) {
8182         if (roworiented) {
8183           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8184         } else {
8185           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8186         }
8187       }
8188     }
8189     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8190     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8191   }
8192   PetscFunctionReturnVoid();
8193 }
8194 
8195 /* Undefining these here since they were redefined from their original definition above! No
8196  * other PETSc functions should be defined past this point, as it is impossible to recover the
8197  * original definitions */
8198 #undef PetscCall
8199 #undef SETERRQ
8200